commit 28faf348c573b90078658ebf1c3fb1f7e268f0e1 Author: lorenzo Date: Mon May 13 11:41:57 2019 +0200 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6414f31 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea/ +data/ +.DS_store +__pycache__ diff --git a/README.md b/README.md new file mode 100644 index 0000000..4da501b --- /dev/null +++ b/README.md @@ -0,0 +1,140 @@ +# Monoloco + +> We tackle the fundamentally ill-posed problem of 3D human localization from monocular RGB images. +Driven by the limitation of neural networks outputting point estimates, +we address the ambiguity in the task with a new neural network predicting confidence intervals through a +loss function based on the Laplace distribution. +Our architecture is a light-weight feed-forward neural network which predicts the 3D coordinates given 2D human pose. +The design is particularly well suited for small training data and cross-dataset generalization. +Our experiments show that (i) we outperform state-of-the art results on KITTI and nuScenes datasets, +(ii) even outperform stereo for far-away pedestrians, and (iii) estimate meaningful confidence intervals. +We further share insights on our model of uncertainty in case of limited observation and out-of-distribution samples. + +``` +@article{bertoni2019monoloco, + title={MonoLoco: Monocular 3D Pedestrian Localization and Uncertainty Estimation}, + author={Bertoni, Lorenzo and Kreiss, Sven and Alahi, Alexandre}, + journal={arXiv preprint arXiv:xxxx}, + year={2019} +} +``` + +Add link paper +![overview_paper](docs/pull.png) +# Setup + +### Install +Python 3.6 or 3.7 is required for nuScenes development kit. Python 3 is required for openpifpaf. +All details for Pifpaf pose detector at [openpifpaf](https://github.com/vita-epfl/openpifpaf). + + +``` +pip install nuscenes-devkit openpifpaf +``` +### Data structure + + Data + ├── arrays + ├── models + ├── baselines + ├── logs + + +Run the following to create the folders: +``` +mkdir data +cd data +mkdir arrays models baselines logs +``` + +### Pre-trained Models +* Download a MonoLoco pre-trained model from Google Drive: ADD LINK and save it in `data/models` +* Download a Pifpaf pre-trained model from [openpifpaf](https://github.com/vita-epfl/openpifpaf) project +and save it into `data/models` + +### Baselines +Download KITTI ground truth txt files from Google Drive ADD link and unzip them in `data/baselines` +The zip file also contains nuScenes and KITTI splits for training and validations as well as detections +from other baselines (more details in Eval section) + + +# Interfaces +All the commands are run through a main file called `main.py` using subparsers. +To check all the commands for the parser and the subparsers run: + +* `python3 src/main.py --help` +* `python3 src/main.py prep --help` +* `python3 src/main.py predict --help` +* `python3 src/main.py train --help` +* `python3 src/main.py eval --help` + + +# Predict +The predict script receives an image (or an entire folder using glob expressions), +calls PifPaf for 2d human pose detection over the image +and runs Monoloco for 3d location of the detected poses. +The command `--networks` defines if saving pifpaf outputs, MonoLoco outputs or both. +You can check all commands for Pifpaf at [openpifpaf](https://github.com/vita-epfl/openpifpaf). + + +Output options include json files and/or visualization of the predictions on the image in *frontal mode*, +*birds-eye-view mode* or *combined mode* and can be specified with `--output_types` + + +### Ground truth matching +* In case you provide a ground-truth json file to compare the predictions of MonoLoco, + the script will match every detection using Intersection over Union metric. + The ground truth file can be generated using the subparser `prep` and called with the command `--path_gt`. + Check preprocess section for more details. + +* In case you don't provide a ground-truth file, the script will look for a predefined path. +If it does not find the file, it will generate images +with all the predictions without ground-truth matching. + +Below an example with and without ground-truth matching. They have been created (adding or removing `--path_gt`) with: +`python src/main.py predict --networks monoloco --glob docs/002282.png --output_types combined --scale 2 --y_scale 1.85 +--model data/models/base_model.pickle --n_dropout 100 --z_max 30` + +With ground truth matching (only matching people): +![predict_ground_truth](docs/002282.png.combined_1.png) + +Without ground_truth matching (all the detected people): +![predict_no_matching](docs/002282.png.combined_2.png) + +# Preprocess +### Input joints for training +MonoLoco is trained using 2D human pose joints detected by pifpaf and matched with the ground truth location provided by +nuScenes or KITTI Dataset. To create the joints run: `python src/main.py prep` specifying: +1. `--dir_ann` annotation directory containing pifpaf joints. You can create them running the predict script and using +`--network pifpaf`. +2. `--dataset` Which dataset to preprocess. For nuscenes, all three versions of the +dataset are supported: nuscenes_mini, nuscenes, nuscenes_teaser. + +### Ground truth file for evaluation +The preprocessing script also outputs a second json file called **names.json** which provide a dictionary indexed +by the image name to easily access ground truth files for evaluation purposes. + + +### Train +Provide the json file containing the preprocess joints (**joints.json**) as argument. + +As simple as `python3 src/main.py --train --joints 'data/arrays/joints.json` + +All the hyperparameters options can be checked at `python3 src/main.py train --help`. + +### Hyperparameters tuning +Random search in log space is provided. An example: `python3 src/main.py train --hyp --multiplier 10 --r_seed 1`. +One iteration of the multiplier includes 6 runs. + +# Eval +Evaluate performances of the trained model on KITTI or Nuscenes Dataset. Compare them with other monocular +and stereo Baselines: +[Mono3D](http://3dimage.ee.tsinghua.edu.cn/cxz/mono3d), +[3DOP](https://xiaozhichen.github.io/papers/nips15chen.pdf), +[MonoDepth](https://arxiv.org/abs/1609.03677) and our +[Geometrical Baseline](src/eval/geom_baseline.py). + +The following graph is obtained running: +`python3 src/main.py eval --dataset kitti --model data/models/base_model.pickle` +![kitti_evaluation](docs/results.png) + diff --git a/docs/002282.png b/docs/002282.png new file mode 100755 index 0000000..e81fe3b Binary files /dev/null and b/docs/002282.png differ diff --git a/docs/002282.png.combined_1.png b/docs/002282.png.combined_1.png new file mode 100644 index 0000000..830f6fa Binary files /dev/null and b/docs/002282.png.combined_1.png differ diff --git a/docs/002282.png.combined_2.png b/docs/002282.png.combined_2.png new file mode 100644 index 0000000..3e44220 Binary files /dev/null and b/docs/002282.png.combined_2.png differ diff --git a/docs/pull.png b/docs/pull.png new file mode 100644 index 0000000..c521bb3 Binary files /dev/null and b/docs/pull.png differ diff --git a/docs/results.png b/docs/results.png new file mode 100644 index 0000000..475caa4 Binary files /dev/null and b/docs/results.png differ diff --git a/src/eval/geom_baseline.py b/src/eval/geom_baseline.py new file mode 100644 index 0000000..6f1dcf1 --- /dev/null +++ b/src/eval/geom_baseline.py @@ -0,0 +1,246 @@ + +import glob +import json +import logging +import os +import numpy as np +import math +from collections import defaultdict +from utils.camera import pixel_to_camera + + +class GeomBaseline: + + def __init__(self, joints): + + # Initialize directories + self.clusters = ['10', '20', '30', '>30', 'all'] + self.average_y = 0.48 + self.joints = joints + + from utils.misc import calculate_iou + self.calculate_iou = calculate_iou + from utils.nuscenes import get_unique_tokens, split_scenes + self.get_unique_tokens = get_unique_tokens + self.split_scenes = split_scenes + + logging.basicConfig(level=logging.INFO) + self.logger = logging.getLogger(__name__) + + def run(self): + """ + List of json files --> 2 lists with mean and std for each segment and the total count of instances + + For each annotation: + 1. From gt boxes calculate the height (deltaY) for the segments head, shoulder, hip, ankle + 2. From mask boxes calculate distance of people using average height of people and real pixel height + + For left-right ambiguities we chose always the average of the joints + + The joints are mapped from 0 to 16 in the following order: + ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder', 'left_elbow', + 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle', + 'right_ankle'] + + """ + cnt_tot = 0 + # Access the joints file + with open(self.joints, 'r') as ff: + dic_joints = json.load(ff) + + dic_dist = defaultdict(lambda: defaultdict(list)) + + # Calculate distances for all the segments + for phase in ['train', 'val']: + + cnt = update_distances(dic_joints[phase], dic_dist, phase, self.average_y) + cnt_tot += cnt + + dic_h_means = calculate_heights(dic_dist['heights'], mode='mean') + dic_h_stds = calculate_heights(dic_dist['heights'], mode='std') + + self.logger.info("Computed distance of {} annotations".format(cnt_tot)) + + for key in dic_h_means: + self.logger.info("Average height of segment {} is {:.2f} with a std of {:.2f}". + format(key, dic_h_means[key], dic_h_stds[key])) + + errors = calculate_error(dic_dist['error']) + + for clst in self.clusters: + self.logger.info("Average distance over the val set for clst {}: {:.2f}".format(clst, errors[clst])) + + self.logger.info("Joints used: {}".format(self.joints)) + + +def update_distances(dic_fin, dic_dist, phase, average_y): + + # Loop over each annotation in the json file corresponding to the image + + cnt = 0 + for idx, kps in enumerate(dic_fin['kps']): + # Extract pixel coordinates of head, shoulder, hip, ankle and and save them + dic_uv = extract_pixel_coord(kps) + + # Convert segments from pixel coordinate to camera coordinate + kk = dic_fin['K'][idx] + z_met = dic_fin['boxes_3d'][idx][2] + + # Create a dict with all annotations in meters + dic_xyz = {key: pixel_to_camera(dic_uv[key], kk, z_met) for key in dic_uv} + + # Compute real height + dy_met = abs(dic_xyz['hip'][1] - dic_xyz['shoulder'][1]) + + # Estimate distance for a single annotation + z_met_real, _ = compute_distance_single(dic_uv['shoulder'], dic_uv['hip'], kk, average_y, + mode='real', dy_met=dy_met) + z_met_approx, _ = compute_distance_single(dic_uv['shoulder'], dic_uv['hip'], kk, average_y, + mode='average') + + # Compute distance with respect to the center of the 3D bounding box + xyz_met = np.array(dic_fin['boxes_3d'][idx][0:3]) + d_met = np.linalg.norm(xyz_met) + d_real = math.sqrt(z_met_real ** 2 + dic_fin['boxes_3d'][idx][0] ** 2 + dic_fin['boxes_3d'][idx][1] ** 2) + d_approx = math.sqrt(z_met_approx ** 2 + + dic_fin['boxes_3d'][idx][0] ** 2 + dic_fin['boxes_3d'][idx][1] ** 2) + + # if abs(d_qmet - d_real) > 1e-1: # "Error in computing distance with real height in pixels" + # aa = 5 + + # Update the dictionary with distance and heights metrics + dic_dist = update_dic_dist(dic_dist, dic_xyz, d_real, d_approx, phase) + cnt += 1 + + return cnt + + +def compute_distance_single(uv_1, uv_2, kk, average_y, mode='average', dy_met=0): + + """ + Compute distance Z of a mask annotation (solving a linear system) for 2 possible cases: + 1. knowing specific height of the annotation (head-ankle) dy_met + 2. using mean height of people (average_y) + """ + assert mode == 'average' or mode == 'real' + # Trasform into normalized camera coordinates (plane at 1m) + xyz_met_norm_1 = pixel_to_camera(uv_1, kk, 1) + xyz_met_norm_2 = pixel_to_camera(uv_2, kk, 1) + + x1 = xyz_met_norm_1[0] + y1 = xyz_met_norm_1[1] + x2 = xyz_met_norm_2[0] + y2 = xyz_met_norm_2[1] + xx = (x1 + x2) / 2 + + # Choose if solving for provided height or average one. + if mode == 'average': + cc = - average_y # Y axis goes down + else: + cc = -dy_met + + # if - 3 * average_y <= cc <= -2: + # aa = 5 + + # Solving the linear system Ax = b + Aa = np.array([[y1, 0, -xx], + [0, -y1, 1], + [y2, 0, -xx], + [0, -y2, 1]]) + + bb = np.array([cc * xx, -cc, 0, 0]).reshape(4, 1) + xx = np.linalg.lstsq(Aa, bb, rcond=None) + z_met = abs(np.float(xx[0][1])) # Abs take into account specularity behind the observer + + # Compute the absolute x and y coordinates in meters + xyz_met_1 = xyz_met_norm_1 * z_met + xyz_met_2 = xyz_met_norm_2 * z_met + + return z_met, (xyz_met_1, xyz_met_2) + + +def extract_pixel_coord(kps): + + """Extract uv coordinates from keypoints and save them in a dict """ + # For each level of height (e.g. 5 points in the head), take the average of them + + uv_head = np.array([np.average(kps[0][0:5]), np.average(kps[1][0:5]), 1]) + uv_shoulder = np.array([np.average(kps[0][5:7]), np.average(kps[1][5:7]), 1]) + uv_hip = np.array([np.average(kps[0][11:13]), np.average(kps[1][11:13]), 1]) + uv_ankle = np.array([np.average(kps[0][15:17]), np.average(kps[1][15:17]), 1]) + + dic_uv = {'head': uv_head, 'shoulder': uv_shoulder, 'hip': uv_hip, 'ankle': uv_ankle} + + return dic_uv + + +def update_dic_dist(dic_dist, dic_xyz, d_real, d_approx, phase): + """ For every annotation in a single image, update the final dictionary""" + + # Update the dict with heights metric + if phase == 'train': + dic_dist['heights']['head'].append(np.float(dic_xyz['head'][1])) + dic_dist['heights']['shoulder'].append(np.float(dic_xyz['shoulder'][1])) + dic_dist['heights']['hip'].append(np.float(dic_xyz['hip'][1])) + dic_dist['heights']['ankle'].append(np.float(dic_xyz['ankle'][1])) + + # Update the dict with distance metrics for the test phase + if phase == 'val': + error = abs(d_real - d_approx) + + if d_real <= 10: + dic_dist['error']['10'].append(error) + elif d_real <= 20: + dic_dist['error']['20'].append(error) + elif d_real <= 30: + dic_dist['error']['30'].append(error) + else: + dic_dist['error']['>30'].append(error) + + dic_dist['error']['all'].append(error) + + return dic_dist + + +def calculate_heights(heights, mode): + """ + Compute statistics of heights based on the distance + """ + + assert mode == 'mean' or mode == 'std' or mode == 'max' + heights_fin = {} + + head_shoulder = np.array(heights['shoulder']) - np.array(heights['head']) + shoulder_hip = np.array(heights['hip']) - np.array(heights['shoulder']) + hip_ankle = np.array(heights['ankle']) - np.array(heights['hip']) + + if mode == 'mean': + heights_fin['head_shoulder'] = np.float(np.mean(head_shoulder)) * 100 + heights_fin['shoulder_hip'] = np.float(np.mean(shoulder_hip)) * 100 + heights_fin['hip_ankle'] = np.float(np.mean(hip_ankle)) * 100 + + elif mode == 'std': + heights_fin['head_shoulder'] = np.float(np.std(head_shoulder)) * 100 + heights_fin['shoulder_hip'] = np.float(np.std(shoulder_hip)) * 100 + heights_fin['hip_ankle'] = np.float(np.std(hip_ankle)) * 100 + + elif mode == 'max': + heights_fin['head_shoulder'] = np.float(np.max(head_shoulder)) * 100 + heights_fin['shoulder_hip'] = np.float(np.max(shoulder_hip)) * 100 + heights_fin['hip_ankle'] = np.float(np.max(hip_ankle)) * 100 + + return heights_fin + + +def calculate_error(dic_errors): + """ + Compute statistics of distances based on the distance + """ + + errors = {} + for clst in dic_errors: + + errors[clst] = np.float(np.mean(np.array(dic_errors[clst]))) + + return errors + diff --git a/src/eval/kitti_eval.py b/src/eval/kitti_eval.py new file mode 100644 index 0000000..4818d88 --- /dev/null +++ b/src/eval/kitti_eval.py @@ -0,0 +1,464 @@ + +import os +import math +import logging +from collections import defaultdict +import json +import copy +import datetime + + +class KittiEval: + """ + Evaluate Monoloco code on KITTI dataset and compare it with: + - Mono3D + - 3DOP + - MonoDepth + """ + + def __init__(self, show=False, thresh_iou_our=0.3, thresh_iou_m3d=0.5, thresh_conf_m3d=0.5, thresh_conf_our=0.3): + + logging.basicConfig(level=logging.INFO) + self.logger = logging.getLogger(__name__) + + self.show = show + from utils.misc import get_idx_max + self.get_idx_max = get_idx_max + from utils.kitti import check_conditions, get_category + self.check_conditions = check_conditions + self.get_category = get_category + from visuals.results import print_results + self.print_results = print_results + + self.dir_gt = os.path.join('data', 'baselines', 'gt') + self.dir_m3d = os.path.join('data', 'baselines', 'm3d') + self.dir_3dop = os.path.join('data', 'baselines', '3dop') + self.dir_md = os.path.join('data', 'baselines', 'monodepth') + self.dir_psm = os.path.join('data', 'baselines', 'psm') + self.dir_our = os.path.join('data', 'baselines', 'monoloco') + path_val = os.path.join('data', 'baselines', 'val.txt') + dir_logs = os.path.join('data', 'logs') + assert dir_logs, "No directory to save final statistics" + + now = datetime.datetime.now() + now_time = now.strftime("%Y%m%d-%H%M")[2:] + self.path_results = os.path.join(dir_logs, 'eval-' + now_time + '.json') + + assert os.path.exists(self.dir_m3d) and os.path.exists(self.dir_our) \ + and os.path.exists(self.dir_3dop) + + self.clusters = ['easy', 'moderate', 'hard', 'all', '6', '10', '15', '20', '25', '30', '40', '50', '>50'] + + self.dic_thresh_iou = {'m3d': thresh_iou_m3d, '3dop': thresh_iou_m3d, 'md': thresh_iou_our, + 'psm': thresh_iou_our, 'our': thresh_iou_our} + self.dic_thresh_conf = {'m3d': thresh_conf_m3d, '3dop': thresh_conf_m3d, 'our': thresh_conf_our} + + self.dic_cnt = defaultdict(int) + self.errors = defaultdict(lambda: defaultdict(list)) + + # Only consider validation images + set_gt = set(os.listdir(self.dir_gt)) + set_val = set() + + with open(path_val, "r") as f_val: + for line in f_val: + set_val.add(line[:-1] + '.txt') + self.list_gt = list(set_gt.intersection(set_val)) + assert self.list_gt, "No images in the folder" + + def run(self): + + self.dic_stds = defaultdict(lambda: defaultdict(list)) + dic_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))) + + cnt_gt = 0 + + # Iterate over each ground truth file in the training set + for name in self.list_gt: + if name == '004647.txt': + aa = 5 + path_gt = os.path.join(self.dir_gt, name) + path_m3d = os.path.join(self.dir_m3d, name) + path_our = os.path.join(self.dir_our, name) + path_3dop = os.path.join(self.dir_3dop, name) + path_md = os.path.join(self.dir_md, name) + path_psm = os.path.join(self.dir_psm, name) + boxes_gt = [] + truncs_gt = [] # Float from 0 to 1 + occs_gt = [] # Either 0,1,2,3 fully visible, partly occluded, largely occluded, unknown + dds_gt = [] + dic_fin = defaultdict(list) + + # Iterate over each line of the gt file and save box location and distances + with open(path_gt, "r") as f_gt: + for line_gt in f_gt: + if self.check_conditions(line_gt, mode='gt'): + truncs_gt.append(float(line_gt.split()[1])) + occs_gt.append(int(line_gt.split()[2])) + boxes_gt.append([float(x) for x in line_gt.split()[4:8]]) + loc_gt = [float(x) for x in line_gt.split()[11:14]] + dds_gt.append(math.sqrt(loc_gt[0] ** 2 + loc_gt[1] ** 2 + loc_gt[2] ** 2)) + cnt_gt += 1 + + # Extract annotations for the same file + if len(boxes_gt) > 0: + boxes_m3d, dds_m3d = self.parse_txts(path_m3d, method='m3d') + boxes_3dop, dds_3dop = self.parse_txts(path_3dop, method='3dop') + boxes_md, dds_md = self.parse_txts(path_md, method='md') + boxes_psm, dds_psm = self.parse_txts(path_psm, method='psm') + boxes_our, dds_our, stds_ale, stds_epi, kk_list, dds_geom, xyzs, xy_kps = \ + self.parse_txts(path_our, method='our') + + if len(boxes_our) > 0 and len(boxes_psm) == 0: + aa = 5 + + # Compute the error with ground truth + + self.estimate_error_base(boxes_m3d, dds_m3d, boxes_gt, dds_gt, truncs_gt, occs_gt, method='m3d') + self.estimate_error_base(boxes_3dop, dds_3dop, boxes_gt, dds_gt, truncs_gt, occs_gt, method='3dop') + self.estimate_error_base(boxes_md, dds_md, boxes_gt, dds_gt, truncs_gt, occs_gt, method='md') + self.estimate_error_base(boxes_psm, dds_psm, boxes_gt, dds_gt, truncs_gt, occs_gt, method='psm') + self.estimate_error_our(boxes_our, dds_our, stds_ale, stds_epi, kk_list, dds_geom, xyzs, xy_kps, + boxes_gt, dds_gt, truncs_gt, occs_gt, dic_fin, name) + + # Iterate over all the files together to find a pool of common annotations + self.compare_error(boxes_m3d, dds_m3d, boxes_3dop, dds_3dop, boxes_md, dds_md, boxes_our, dds_our, + boxes_psm, dds_psm, boxes_gt, dds_gt, truncs_gt, occs_gt, dds_geom) + + # Save statistics + for key in self.errors: + for clst in self.clusters[:-2]: # M3d and pifpaf does not have annotations above 40 meters + dic_stats['test'][key]['mean'][clst] = sum(self.errors[key][clst]) / float(len(self.errors[key][clst])) + dic_stats['test'][key]['max'][clst] = max(self.errors[key][clst]) + dic_stats['test'][key]['cnt'][clst] = len(self.errors[key][clst]) + + if key == 'our': + for clst in self.clusters[:-2]: + dic_stats['test'][key]['std_ale'][clst] = \ + sum(self.dic_stds['ale'][clst]) / float(len(self.dic_stds['ale'][clst])) + dic_stats['test'][key]['std_epi'][clst] = \ + sum(self.dic_stds['epi'][clst]) / float(len(self.dic_stds['epi'][clst])) + dic_stats['test'][key]['interval'][clst] = \ + sum(self.dic_stds['interval'][clst]) / float(len(self.dic_stds['interval'][clst])) + dic_stats['test'][key]['at_risk'][clst] = \ + sum(self.dic_stds['at_risk'][clst]) / float(len(self.dic_stds['at_risk'][clst])) + + # Print statistics + print(" Number of GT annotations: {} ".format(cnt_gt)) + for key in self.errors: + if key in ['our', 'm3d', '3dop']: + print(" Number of {} annotations with confidence >= {} : {} " + .format(key, self.dic_thresh_conf[key], self.dic_cnt[key])) + + # Include also missed annotations in the statistics + matched = len(self.errors[key]['all']) + missed = cnt_gt - matched + zeros = [0] * missed + self.errors[key]['<0.5m'].extend(zeros) + self.errors[key]['<1m'].extend(zeros) + self.errors[key]['<2m'].extend(zeros) + + for clst in self.clusters[:-9]: + print(" {} Average error in cluster {}: {:.2f} with a max error of {:.1f}, " + "for {} annotations" + .format(key, clst, dic_stats['test'][key]['mean'][clst], dic_stats['test'][key]['max'][clst], + dic_stats['test'][key]['cnt'][clst])) + + if key == 'our': + print("% of annotation inside the confidence interval: {:.1f} %, " + "of which {:.1f} % at higher risk" + .format(100 * dic_stats['test'][key]['interval'][clst], + 100 * dic_stats['test'][key]['at_risk'][clst])) + + for perc in ['<0.5m', '<1m', '<2m']: + print("{} Instances with error {}: {:.2f} %" + .format(key, perc, 100 * sum(self.errors[key][perc])/len(self.errors[key][perc]))) + + print("\n Number of matched annotations: {:.1f} %".format(100 * matched/cnt_gt)) + print("-"*100) + + # Print images + self.print_results(dic_stats, self.show) + + def parse_txts(self, path, method): + boxes = [] + dds = [] + stds_ale = [] + stds_epi = [] + confs = [] + dds_geom = [] + xyzs = [] + xy_kps = [] + + # Iterate over each line of the txt file + if method == '3dop' or method == 'm3d': + try: + with open(path, "r") as ff: + for line in ff: + if self.check_conditions(line, thresh=self.dic_thresh_conf[method], mode=method): + boxes.append([float(x) for x in line.split()[4:8]]) + loc = ([float(x) for x in line.split()[11:14]]) + dds.append(math.sqrt(loc[0] ** 2 + loc[1] ** 2 + loc[2] ** 2)) + self.dic_cnt[method] += 1 + return boxes, dds + + except FileNotFoundError: + return [], [] + + elif method == 'md': + try: + with open(path, "r") as ff: + for line in ff: + box = [float(x[:-1]) for x in line.split()[0:4]] + delta_h = (box[3] - box[1]) / 10 + delta_w = (box[2] - box[0]) / 10 + assert delta_h > 0 and delta_w > 0, "Bounding box <=0" + box[0] -= delta_w + box[1] -= delta_h + box[2] += delta_w + box[3] += delta_h + boxes.append(box) + dds.append(float(line.split()[5][:-1])) + self.dic_cnt[method] += 1 + return boxes, dds + + except FileNotFoundError: + return [], [] + + elif method == 'psm': + try: + with open(path, "r") as ff: + for line in ff: + box = [float(x[:-1]) for x in line[1:-1].split(',')[0:4]] + delta_h = (box[3] - box[1]) / 10 + delta_w = (box[2] - box[0]) / 10 + assert delta_h > 0 and delta_w > 0, "Bounding box <=0" + box[0] -= delta_w + box[1] -= delta_h + box[2] += delta_w + box[3] += delta_h + boxes.append(box) + dds.append(float(line.split()[5][:-1])) + self.dic_cnt[method] += 1 + return boxes, dds + + except FileNotFoundError: + return [], [] + + elif method == 'our': + try: + with open(path, "r") as ff: + file_lines = ff.readlines() + for line_our in file_lines[:-1]: + line_list = [float(x) for x in line_our.split()] + if self.check_conditions(line_list, thresh=self.dic_thresh_conf[method], mode=method): + boxes.append(line_list[:4]) + xyzs.append(line_list[4:7]) + dds.append(line_list[7]) + stds_ale.append(line_list[8]) + stds_epi.append(line_list[9]) + dds_geom.append(line_list[11]) + xy_kps.append(line_list[12:]) + + self.dic_cnt[method] += 1 + + kk_list = [float(x) for x in file_lines[-1].split()] + + return boxes, dds, stds_ale, stds_epi, kk_list, dds_geom, xyzs, xy_kps + + except FileNotFoundError: + return [], [], [], [], [], [], [], [] + + def estimate_error_base(self, boxes, dds, boxes_gt, dds_gt, truncs_gt, occs_gt, method): + + # Compute error (distance) and save it + boxes_gt = copy.deepcopy(boxes_gt) + dds_gt = copy.deepcopy(dds_gt) + truncs_gt = copy.deepcopy(truncs_gt) + occs_gt = copy.deepcopy(occs_gt) + + for idx, box in enumerate(boxes): + if len(boxes_gt) >= 1: + dd = dds[idx] + idx_max, iou_max = self.get_idx_max(box, boxes_gt) + cat = self.get_category(boxes_gt[idx_max], truncs_gt[idx_max], occs_gt[idx_max]) + # Update error if match is found + if iou_max > self.dic_thresh_iou[method]: + dd_gt = dds_gt[idx_max] + self.update_errors(dd, dd_gt, cat, self.errors[method]) + + boxes_gt.pop(idx_max) + dds_gt.pop(idx_max) + truncs_gt.pop(idx_max) + occs_gt.pop(idx_max) + else: + break + + def estimate_error_our(self, boxes, dds, stds_ale, stds_epi, kk_list, dds_geom, xyzs, xy_kps, + boxes_gt, dds_gt, truncs_gt, occs_gt, dic_fin, name): + + # Compute error (distance) and save it + boxes_gt = copy.deepcopy(boxes_gt) + dds_gt = copy.deepcopy(dds_gt) + truncs_gt = copy.deepcopy(truncs_gt) + occs_gt = copy.deepcopy(occs_gt) + + for idx, box in enumerate(boxes): + if len(boxes_gt) >= 1: + dd = dds[idx] + dd_geom = dds_geom[idx] + ale = stds_ale[idx] + epi = stds_epi[idx] + xyz = xyzs[idx] + xy_kp = xy_kps[idx] + idx_max, iou_max = self.get_idx_max(box, boxes_gt) + cat = self.get_category(boxes_gt[idx_max], truncs_gt[idx_max], occs_gt[idx_max]) + + # Update error if match is found + if iou_max > self.dic_thresh_iou['our']: + dd_gt = dds_gt[idx_max] + self.update_errors(dd, dd_gt, cat, self.errors['our']) + self.update_errors(dd_geom, dd_gt, cat, self.errors['geom']) + self.update_uncertainty(ale, epi, dd, dd_gt, cat) + + boxes_gt.pop(idx_max) + dds_gt.pop(idx_max) + truncs_gt.pop(idx_max) + occs_gt.pop(idx_max) + + # Extract K and save it everything in a json file + dic_fin['boxes'].append(box) + dic_fin['dds_gt'].append(dd_gt) + dic_fin['dds_pred'].append(dd) + dic_fin['stds_ale'].append(ale) + dic_fin['stds_epi'].append(epi) + dic_fin['dds_geom'].append(dd_geom) + dic_fin['xyz'].append(xyz) + dic_fin['xy_kps'].append(xy_kp) + else: + break + + # kk_fin = np.array(kk_list).reshape(3, 3).tolist() + # dic_fin['K'] = kk_fin + # path_json = os.path.join(self.dir_fin, name[:-4] + '.json') + # with open(path_json, 'w') as ff: + # json.dump(dic_fin, ff) + + def compare_error(self, boxes_m3d, dds_m3d, boxes_3dop, dds_3dop, boxes_md, dds_md, boxes_our, dds_our, + boxes_psm, dds_psm, boxes_gt, dds_gt, truncs_gt, occs_gt, dds_geom): + + boxes_gt = copy.deepcopy(boxes_gt) + dds_gt = copy.deepcopy(dds_gt) + truncs_gt = copy.deepcopy(truncs_gt) + occs_gt = copy.deepcopy(occs_gt) + + for idx, box in enumerate(boxes_our): + if len(boxes_gt) >= 1: + dd_our = dds_our[idx] + dd_geom = dds_geom[idx] + idx_max, iou_max = self.get_idx_max(box, boxes_gt) + cat = self.get_category(boxes_gt[idx_max], truncs_gt[idx_max], occs_gt[idx_max]) + + idx_max_3dop, iou_max_3dop = self.get_idx_max(box, boxes_3dop) + idx_max_m3d, iou_max_m3d = self.get_idx_max(box, boxes_m3d) + idx_max_md, iou_max_md = self.get_idx_max(box, boxes_md) + # idx_max_psm, iou_max_psm = self.get_idx_max(box, boxes_psm) + iou_max_psm = 1 + + iou_min = min(iou_max_3dop, iou_max_m3d, iou_max_md, iou_max_psm) + + if iou_max >= self.dic_thresh_iou['our'] and iou_min >= self.dic_thresh_iou['m3d']: + + dd_gt = dds_gt[idx_max] + dd_3dop = dds_3dop[idx_max_3dop] + dd_m3d = dds_m3d[idx_max_m3d] + dd_md = dds_md[idx_max_md] + # dd_psm = dds_psm[idx_max_psm] + + self.update_errors(dd_3dop, dd_gt, cat, self.errors['3dop_merged']) + self.update_errors(dd_our, dd_gt, cat, self.errors['our_merged']) + self.update_errors(dd_m3d, dd_gt, cat, self.errors['m3d_merged']) + self.update_errors(dd_geom, dd_gt, cat, self.errors['geom_merged']) + self.update_errors(dd_md, dd_gt, cat, self.errors['md_merged']) + # self.update_errors(dd_psm, dd_gt, cat, self.errors['psm_merged']) + self.dic_cnt['merged'] += 1 + + boxes_gt.pop(idx_max) + dds_gt.pop(idx_max) + truncs_gt.pop(idx_max) + occs_gt.pop(idx_max) + else: + break + + def update_errors(self, dd, dd_gt, cat, errors): + + """Compute and save errors between a single box and the gt box which match""" + + diff = abs(dd - dd_gt) + clst = self.find_cluster(dd_gt, self.clusters) + errors['all'].append(diff) + errors[cat].append(diff) + errors[clst].append(diff) + + # Check if the distance is less than one or 2 meters + if diff <= 0.5: + errors['<0.5m'].append(1) + else: + errors['<0.5m'].append(0) + + if diff <= 1: + errors['<1m'].append(1) + else: + errors['<1m'].append(0) + + if diff <= 2: + errors['<2m'].append(1) + else: + errors['<2m'].append(0) + + def update_uncertainty(self, std_ale, std_epi, dd, dd_gt, cat): + + clst = self.find_cluster(dd_gt, self.clusters) + self.dic_stds['ale']['all'].append(std_ale) + self.dic_stds['ale'][clst].append(std_ale) + self.dic_stds['ale'][cat].append(std_ale) + self.dic_stds['epi']['all'].append(std_epi) + self.dic_stds['epi'][clst].append(std_epi) + self.dic_stds['epi'][cat].append(std_epi) + + # Number of annotations inside the confidence interval + if dd_gt <= dd: # Particularly dangerous instances + self.dic_stds['at_risk']['all'].append(1) + self.dic_stds['at_risk'][clst].append(1) + self.dic_stds['at_risk'][cat].append(1) + + if abs(dd - dd_gt) <= (std_epi): + self.dic_stds['interval']['all'].append(1) + self.dic_stds['interval'][clst].append(1) + self.dic_stds['interval'][cat].append(1) + + else: + self.dic_stds['interval']['all'].append(0) + self.dic_stds['interval'][clst].append(0) + self.dic_stds['interval'][cat].append(0) + + + else: + self.dic_stds['at_risk']['all'].append(0) + self.dic_stds['at_risk'][clst].append(0) + self.dic_stds['at_risk'][cat].append(0) + + + # self.dic_stds['at_risk']['all'].append(0) + # self.dic_stds['at_risk'][clst].append(0) + # self.dic_stds['at_risk'][cat].append(0) + + @staticmethod + def find_cluster(dd, clusters): + """Find the correct cluster. The first and the last one are not numeric""" + + for clst in clusters[4: -1]: + if dd <= int(clst): + return clst + + return clusters[-1] diff --git a/src/eval/run_kitti.py b/src/eval/run_kitti.py new file mode 100644 index 0000000..79d1b07 --- /dev/null +++ b/src/eval/run_kitti.py @@ -0,0 +1,154 @@ + +import torch +import math +import numpy as np +import os +import glob +import json +import logging +from models.architectures import TriLinear, LinearModel + + +class RunKitti: + + def __init__(self, model, dir_ann, dropout, hidden_size, n_stage, n_dropout): + + logging.basicConfig(level=logging.INFO) + self.logger = logging.getLogger(__name__) + + # Set directories + self.dir_ann = dir_ann + self.average_y = 0.48 + self.n_dropout = n_dropout + self.n_samples = 100 + + list_ann = glob.glob(os.path.join(dir_ann, '*.json')) + self.dir_kk = os.path.join('data', 'baselines', 'calib') + self.dir_out = os.path.join('data', 'baselines', 'monoloco') + if not os.path.exists(self.dir_out): + os.makedirs(self.dir_out) + print("Created output directory for txt files") + + self.list_basename = [os.path.basename(x).split('.')[0] for x in list_ann] + assert self.list_basename, " Missing json annotations file to create txt files for KITTI datasets" + + # Load the model + input_size = 17 * 2 + use_cuda = torch.cuda.is_available() + self.device = torch.device("cuda:0" if use_cuda else "cpu") + self.model = LinearModel(input_size=input_size, output_size=2, linear_size=hidden_size, + p_dropout=dropout, num_stage=n_stage) + self.model.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage)) + self.model.eval() # Default is train + self.model.to(self.device) + + # Import + from utils.misc import epistemic_variance, laplace_sampling + self.epistemic_variance = epistemic_variance + self.laplace_sampling = laplace_sampling + from visuals.printer import Printer + self.Printer = Printer + from utils.kitti import eval_geometric, get_calibration + self.eval_geometric = eval_geometric + self.get_calibration = get_calibration + + from utils.normalize import unnormalize_bi + self.unnormalize_bi = unnormalize_bi + + from utils.pifpaf import get_input_data, preprocess_pif + self.get_input_data = get_input_data + self.preprocess_pif = preprocess_pif + + # Counters + self.cnt_ann = 0 + self.cnt_file = 0 + self.cnt_no_file = 0 + + def run(self): + + # Run inference + for basename in self.list_basename: + path_calib = os.path.join(self.dir_kk, basename + '.txt') + kk, tt = self.get_calibration(path_calib) + + path_ann = os.path.join(self.dir_ann, basename + '.png.pifpaf.json') + with open(path_ann, 'r') as f: + annotations = json.load(f) + + boxes, keypoints = self.preprocess_pif(annotations) + (inputs, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = self.get_input_data(boxes, keypoints, kk) + + dds_geom, xy_centers = self.eval_geometric(uv_kps, uv_centers, uv_shoulders, kk, average_y=0.48) + + self.cnt_ann += len(boxes) + + inputs = torch.from_numpy(np.array(inputs)).float().to(self.device) + + if len(inputs) == 0: + self.cnt_no_file += 1 + + else: + self.cnt_file += 1 + + if self.n_dropout > 0: + total_outputs = torch.empty((0, len(uv_boxes))).to(self.device) + self.model.dropout.training = True + for ii in range(self.n_dropout): + outputs = self.model(inputs) + outputs = self.unnormalize_bi(outputs) + samples = self.laplace_sampling(outputs, self.n_samples) + total_outputs = torch.cat((total_outputs, samples), 0) + varss = total_outputs.std(0) + + else: + varss = [0]*len(uv_boxes) + + # Don't use dropout for the mean prediction and aleatoric uncertainty + self.model.dropout.training = False + outputs_net = self.model(inputs) + outputs = outputs_net.cpu().detach().numpy() + + path_txt = os.path.join(self.dir_out, basename + '.txt') + with open(path_txt, "w+") as ff: + for idx in range(outputs.shape[0]): + xx_1 = float(xy_centers[idx][0]) + yy_1 = float(xy_centers[idx][1]) + xy_kp = xy_kps[idx] + dd = float(outputs[idx][0]) + std_ale = math.exp(float(outputs[idx][1])) * dd + + zz = dd / math.sqrt(1 + xx_1**2 + yy_1**2) + xx_cam_0 = xx_1*zz + tt[0] # Still to verify the sign but negligible + yy_cam_0 = yy_1*zz + tt[1] + zz_cam_0 = zz + tt[2] + dd_cam_0 = math.sqrt(xx_cam_0**2 + yy_cam_0**2 + zz_cam_0**2) + + uv_box = uv_boxes[idx] + + twodecimals = ["%.3f" % vv for vv in [uv_box[0], uv_box[1], uv_box[2], uv_box[3], + xx_cam_0, yy_cam_0, zz_cam_0, dd_cam_0, + std_ale, varss[idx], uv_box[4], dds_geom[idx]]] + + keypoints_str = ["%.5f" % vv for vv in xy_kp] + for item in twodecimals: + ff.write("%s " % item) + for item in keypoints_str: + ff.write("%s " % item) + ff.write("\n") + + # Save intrinsic matrix in the last row + kk_list = kk.reshape(-1,).tolist() + for kk_el in kk_list: + ff.write("%f " % kk_el) + ff.write("\n") + + # Print statistics + print("Saved in {} txt {} annotations. Not found {} images" + .format(self.cnt_file, self.cnt_ann, self.cnt_no_file)) + + + + + + + diff --git a/src/features/preprocess_ki.py b/src/features/preprocess_ki.py new file mode 100644 index 0000000..3dabf9d --- /dev/null +++ b/src/features/preprocess_ki.py @@ -0,0 +1,151 @@ + +import os +import glob +import math +import logging +from collections import defaultdict +import json +import datetime + + +class PreprocessKitti: + """Prepare arrays with same format as nuScenes preprocessing but using ground truth txt files""" + + def __init__(self, dir_ann, iou_thresh=0.3): + + self.dir_ann = dir_ann + self.iou_thresh = iou_thresh + self.dir_gt = os.path.join('data', 'baselines', 'gt') + self.names_gt = os.listdir(self.dir_gt) + self.dir_kk = os.path.join('data', 'baselines', 'calib') + self.list_gt = glob.glob(self.dir_gt + '/*.txt') + assert os.path.exists(self.dir_gt), "Ground truth dir does not exist" + assert os.path.exists(self.dir_ann), "Annotation dir does not exist" + + now = datetime.datetime.now() + now_time = now.strftime("%Y%m%d-%H%M")[2:] + dir_out = os.path.join('data', 'arrays') + self.path_joints = os.path.join(dir_out, 'joints-kitti-' + now_time + '.json') + self.path_names = os.path.join(dir_out, 'names-kitti-' + now_time + '.json') + path_train = os.path.join('data', 'baselines', 'train.txt') + path_val = os.path.join('data', 'baselines', 'val.txt') + + logging.basicConfig(level=logging.INFO) + self.logger = logging.getLogger(__name__) + + from utils.kitti import get_calibration, check_conditions + self.get_calibration = get_calibration + self.check_conditions = check_conditions + + from utils.pifpaf import get_input_data, preprocess_pif + self.get_input_data = get_input_data + self.preprocess_pif = preprocess_pif + + from utils.misc import get_idx_max, append_cluster + self.get_idx_max = get_idx_max + self.append_cluster = append_cluster + + # self.clusters = ['all', '6', '10', '15', '20', '25', '30', '40', '50', '>50' + self.cnt_gt = 0 + self.cnt_fnf = 0 + self.dic_cnt = {'train': 0, 'val': 0, 'test': 0} + + # Split training and validation images + set_gt = set(self.names_gt) + set_train = set() + set_val = set() + + with open(path_train, "r") as f_train: + for line in f_train: + set_train.add(line[:-1] + '.txt') + with open(path_val, "r") as f_val: + for line in f_val: + set_val.add(line[:-1] + '.txt') + + self.set_train = set_gt.intersection(set_train) + self.set_val = set_gt.intersection(set_val) + assert self.set_train and self.set_val, "No validation or training annotations" + + self.dic_jo = {'train': dict(X=[], Y=[], names=[], kps=[], + clst=defaultdict(lambda: defaultdict(list))), + 'val': dict(X=[], Y=[], names=[], kps=[], + clst=defaultdict(lambda: defaultdict(list))), + 'test': dict(X=[], Y=[], names=[], kps=[], + clst=defaultdict(lambda: defaultdict(list)))} + + self.dic_names = defaultdict(lambda: defaultdict(list)) + + def run(self): + + for name in self.names_gt: + # Extract ground truth + if name == '004223.txt': + aa = 5 + path_gt = os.path.join(self.dir_gt, name) + basename, _ = os.path.splitext(name) + boxes_gt = [] + dds = [] + + if name in self.set_train: + phase = 'train' + elif name in self.set_val: + phase = 'val' + else: + self.cnt_fnf += 1 + continue + + # Iterate over each line of the gt file and save box location and distances + with open(path_gt, "r") as f_gt: + for line_gt in f_gt: + if self.check_conditions(line_gt, mode='gt'): + box = [float(x) for x in line_gt.split()[4:8]] + boxes_gt.append(box) + loc_gt = [float(x) for x in line_gt.split()[11:14]] + dd = math.sqrt(loc_gt[0] ** 2 + loc_gt[1] ** 2 + loc_gt[2] ** 2) + dds.append(dd) + self.dic_names[basename + '.png']['boxes'].append(box) + self.dic_names[basename + '.png']['dds'].append(dd) + self.cnt_gt += 1 + + # Extract keypoints + path_txt = os.path.join(self.dir_kk, basename + '.txt') + kk, tt = self.get_calibration(path_txt) + + # Find the annotations if exists + try: + with open(os.path.join(self.dir_ann, basename + '.png.pifpaf.json'), 'r') as f: + annotations = json.load(f) + boxes, keypoints = self.preprocess_pif(annotations) + (inputs, xy_kps), (uv_kps, uv_boxes, _, _) = self.get_input_data(boxes, keypoints, kk) + + except FileNotFoundError: + uv_boxes = [] + + # Match each set of keypoint with a ground truth + for ii, box in enumerate(uv_boxes): + idx_max, iou_max = self.get_idx_max(box, boxes_gt) + + if iou_max >= self.iou_thresh: + + self.dic_jo[phase]['kps'].append(uv_kps[ii]) + self.dic_jo[phase]['X'].append(inputs[ii]) + self.dic_jo[phase]['Y'].append([dds[idx_max]]) # Trick to make it (nn,1) + self.dic_jo[phase]['names'].append(name) # One image name for each annotation + self.append_cluster(self.dic_jo, phase, inputs[ii], dds[idx_max], uv_kps[ii]) + self.dic_cnt[phase] += 1 + boxes_gt.pop(idx_max) + dds.pop(idx_max) + + with open(self.path_joints, 'w') as f: + json.dump(self.dic_jo, f) + with open(os.path.join(self.path_names), 'w') as f: + json.dump(self.dic_names, f) + for phase in ['train', 'val', 'test']: + print("Saved {} annotations for phase {}" + .format(self.dic_cnt[phase], phase)) + print("Number of GT files: {}. Files not found: {}" + .format(self.cnt_gt, self.cnt_fnf)) + print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints)) + + + diff --git a/src/features/preprocess_nu.py b/src/features/preprocess_nu.py new file mode 100644 index 0000000..0a97ea5 --- /dev/null +++ b/src/features/preprocess_nu.py @@ -0,0 +1,186 @@ +import numpy as np +import os +import sys +import time +import json +import logging +from collections import defaultdict +import datetime + + +class PreprocessNuscenes: + """ + Preprocess Nuscenes dataset + """ + def __init__(self, dir_ann, dir_nuscenes, dataset, iou_min=0.3): + + logging.basicConfig(level=logging.INFO) + self.logger = logging.getLogger(__name__) + + self.dir_ann = dir_ann + dir_out = os.path.join('data', 'arrays') + assert os.path.exists(dir_nuscenes), "Nuscenes directory does not exists" + assert os.path.exists(self.dir_ann), "The annotations directory does not exists" + assert os.path.exists(dir_out), "Joints directory does not exists" + + now = datetime.datetime.now() + now_time = now.strftime("%Y%m%d-%H%M")[2:] + self.path_joints = os.path.join(dir_out, 'joints-' + dataset + '-' + now_time + '.json') + self.path_names = os.path.join(dir_out, 'names-' + dataset + '-' + now_time + '.json') + + self.iou_min = iou_min + + # Import functions + from utils.misc import get_idx_max, append_cluster + self.get_idx_max = get_idx_max + self.append_cluster = append_cluster + from utils.nuscenes import select_categories + self.select_categories = select_categories + from utils.camera import project_3d + self.project_3d = project_3d + from utils.pifpaf import get_input_data, preprocess_pif + self.get_input_data = get_input_data + self.preprocess_pif = preprocess_pif + from nuscenes.nuscenes import NuScenes + from nuscenes.utils import splits + self.splits = splits + + # Initialize dicts to save joints for training + self.dic_jo = {'train': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[], + clst=defaultdict(lambda: defaultdict(list))), + 'val': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[], + clst=defaultdict(lambda: defaultdict(list))), + 'test': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[], + clst=defaultdict(lambda: defaultdict(list))) + } + # Names as keys to retrieve it easily + self.dic_names = defaultdict(lambda: defaultdict(list)) + + self.cameras = ['CAM_FRONT', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT'] + + # Split training and validation base on the dataset type + if dataset == 'nuscenes': + self.nusc = NuScenes(version='v1.0-trainval', dataroot=dir_nuscenes, verbose=True) + self.scenes = self.nusc.scene + split_scenes = self.splits.create_splits_scenes() + self.split_train, self.split_val = split_scenes['train'], split_scenes['val'] + + elif dataset == 'nuscenes_mini': + self.nusc = NuScenes(version='v1.0-mini', dataroot=dir_nuscenes, verbose=True) + self.scenes = self.nusc.scene + split_scenes = self.splits.create_splits_scenes() + self.split_train, self.split_val = split_scenes['train'], split_scenes['val'] + + elif dataset == 'nuscenes_teaser': + self.nusc = NuScenes(version='v1.0-trainval', dataroot=dir_nuscenes, verbose=True) + with open("data/baselines/teaser_scenes.txt", "r") as ff: + teaser_scenes = ff.read().splitlines() + self.scenes = self.nusc.scene + self.scenes = [scene for scene in self.scenes if scene['token'] in teaser_scenes] + with open("data/baselines/split_teaser_scenes.json", "r") as ff: + dic_split = json.load(ff) + self.split_train = [scene['name'] for scene in self.scenes if scene['token'] in dic_split['train']] + self.split_val = [scene['name'] for scene in self.scenes if scene['token'] in dic_split['val']] + + def run(self): + """ + Prepare arrays for training + """ + cnt_scenes = 0 + cnt_samples = 0 + cnt_sd = 0 + cnt_ann = 0 + start = time.time() + + for ii, scene in enumerate(self.scenes): + end_scene = time.time() + current_token = scene['first_sample_token'] + cnt_scenes += 1 + if ii == 0: + time_left = "Nan" + else: + time_left = str((end_scene-start_scene)/60 * (len(self.scenes) - ii))[:4] + + sys.stdout.write('\r' + 'Elaborating scene {}, remaining time {} minutes'.format(cnt_scenes, time_left) + '\t\n') + start_scene = time.time() + if scene['name'] in self.split_train: + phase = 'train' + elif scene['name'] in self.split_val: + phase = 'val' + else: + print("phase name not in training or validation split") + continue + + while not current_token == "": + sample_dic = self.nusc.get('sample', current_token) + cnt_samples += 1 + + # Extract all the sample_data tokens for each sample + for cam in self.cameras: + sd_token = sample_dic['data'][cam] + cnt_sd += 1 + path_im, boxes_obj, kk = self.nusc.get_sample_data(sd_token, box_vis_level=1) # At least one corner + + # Extract all the annotations of the person + boxes_gt = [] + dds = [] + boxes_3d = [] + name = os.path.basename(path_im) + for box_obj in boxes_obj: + if box_obj.name[:6] != 'animal': + general_name = box_obj.name.split('.')[0] + '.' + box_obj.name.split('.')[1] + else: + general_name = 'animal' + if general_name in self.select_categories('all'): + box = self.project_3d(box_obj, kk) + dd = np.linalg.norm(box_obj.center) + boxes_gt.append(box) + dds.append(dd) + box_3d = box_obj.center.tolist() + box_obj.wlh.tolist() + boxes_3d.append(box_3d) + self.dic_names[name]['boxes'].append(box) + self.dic_names[name]['dds'].append(dd) + + # Run IoU with pifpaf detections and save + path_pif = os.path.join(self.dir_ann, name + '.pifpaf.json') + exists = os.path.isfile(path_pif) + + if exists: + with open(path_pif, 'r') as f: + annotations = json.load(f) + + boxes, keypoints = self.preprocess_pif(annotations, im_size=None) + (inputs, xy_kps), (uv_kps, uv_boxes, _, _) = self.get_input_data(boxes, keypoints, kk) + + for ii, box in enumerate(uv_boxes): + idx_max, iou_max = self.get_idx_max(box, boxes_gt) + + if iou_max > self.iou_min: + try: + self.dic_jo[phase]['kps'].append(uv_kps[ii]) + except TypeError: + aa = 4 + self.dic_jo[phase]['X'].append(inputs[ii]) + self.dic_jo[phase]['Y'].append([dds[idx_max]]) # Trick to make it (nn,1) + self.dic_jo[phase]['names'].append(name) # One image name for each annotation + self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_max]) + self.dic_jo[phase]['K'].append(kk.tolist()) + self.append_cluster(self.dic_jo, phase, inputs[ii], dds[idx_max], uv_kps[ii]) + boxes_gt.pop(idx_max) + dds.pop(idx_max) + boxes_3d.pop(idx_max) + cnt_ann += 1 + sys.stdout.write('\r' + 'Saved annotations {}' + .format(cnt_ann) + '\t') + + current_token = sample_dic['next'] + + with open(os.path.join(self.path_joints), 'w') as f: + json.dump(self.dic_jo, f) + with open(os.path.join(self.path_names), 'w') as f: + json.dump(self.dic_names, f) + end = time.time() + + print("\nSaved {} annotations for {} samples in {} scenes. Total time: {:.1f} minutes" + .format(cnt_ann, cnt_samples, cnt_scenes, (end-start)/60)) + print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints)) diff --git a/src/features/resize_images.py b/src/features/resize_images.py new file mode 100644 index 0000000..353b4c6 --- /dev/null +++ b/src/features/resize_images.py @@ -0,0 +1,37 @@ + +import glob +import logging +import os +import cv2 +import sys + + +def resize(input_glob, output_dir, factor=2): + """ + Resize images using multiplicative factor + """ + list_im = glob.glob(input_glob) + + + for idx, path_in in enumerate(list_im): + + basename, _ = os.path.splitext(os.path.basename(path_in)) + im = cv2.imread(path_in) + assert im is not None, "Image not found" + + # Paddle the image if requested and resized the dataset to a fixed dataset + h_im = im.shape[0] + w_im = im.shape[1] + w_new = round(factor * w_im) + h_new = round(factor * h_im) + + print("resizing image {} to: {} x {}".format(basename, w_new, h_new)) + im_new = cv2.resize(im, (w_new, h_new)) + + # Save the image + name_im = basename + '.png' + path_out = os.path.join(output_dir, name_im) + cv2.imwrite(path_out, im_new) + sys.stdout.write('\r' + 'Saving image number: {}'.format(idx) + '\t') + + diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..173fa1b --- /dev/null +++ b/src/main.py @@ -0,0 +1,166 @@ + +import argparse +import os +import sys + +sys.path.insert(0, os.path.join('.', 'features')) +sys.path.insert(0, os.path.join('.', 'models')) +from openpifpaf.network import nets +from openpifpaf import decoder +from features.preprocess_nu import PreprocessNuscenes +from features.preprocess_ki import PreprocessKitti +from predict.predict_2d_3d import predict +from features.trial import trials +from models.trainer import Trainer +from eval.run_kitti import RunKitti +from eval.geom_baseline import GeomBaseline +from models.hyp_tuning import HypTuning +from eval.kitti_eval import KittiEval + + +def cli(): + + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + # Subparser definition + subparsers = parser.add_subparsers(help='Different parsers for main actions', dest='command') + predict_parser = subparsers.add_parser("predict") + prep_parser = subparsers.add_parser("prep") + training_parser = subparsers.add_parser("train") + eval_parser = subparsers.add_parser("eval") + + # Preprocess input data + prep_parser.add_argument('--dataset', + help='datasets to preprocess: nuscenes, nuscenes_teaser, nuscenes_mini, kitti', + default='nuscenes') + prep_parser.add_argument('--dir_ann', help='directory of annotations of 2d joints', + default='/data/lorenzo-data/nuscenes_new/annotations/scale_1_april_new') + prep_parser.add_argument('--dir_nuscenes', help='directory of nuscenes devkit', + default='/data/lorenzo-data/nuscenes_new/') + + # Predict (2D pose and/or 3D location from images) + # 0) General arguments + predict_parser.add_argument('--networks', nargs='+', help='Run pifpaf and/or monoloco', default=['monoloco']) + predict_parser.add_argument('images', nargs='*', help='input images') + predict_parser.add_argument('--glob', help='glob expression for input images (for many images)') + predict_parser.add_argument('-o', '--output-directory', help='Output directory') + predict_parser.add_argument('--output_types', nargs='+', default=['json'], + help='what to output: json keypoints skeleton for Pifpaf' + 'json bird front combined for Monoloco') + predict_parser.add_argument('--show', help='to show images', action='store_true') + + # 1)Pifpaf arguments + nets.cli(predict_parser) + decoder.cli(predict_parser, force_complete_pose=True, instance_threshold=0.1) + predict_parser.add_argument('--model_pifpaf', help='pifpaf model to load', + default="data/models/resnet152-190412.pkl") + predict_parser.add_argument('--scale', default=1.0, type=float, help='change the scale of the image to preprocess') + + # 2) Monoloco argument + predict_parser.add_argument('--model', help='path of MonoLoco model to load', + default="data/models/best_model__seed_2_.pickle") + predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization', + default='data/arrays/names-kitti-000.json') + predict_parser.add_argument('--dir_calib', default='data/baselines/calib/', help='directory of calib_files') + predict_parser.add_argument('--transform', help='transformation for the pose', default='None') + predict_parser.add_argument('--draw_kps', help='to draw kps in the images', action='store_true') + predict_parser.add_argument('--predict', help='whether to make prediction', action='store_true') + predict_parser.add_argument('--no_calib', help='use standard intrinsic matrix', action='store_true') + predict_parser.add_argument('--z_max', type=int, help='', default=22) + predict_parser.add_argument('--y_scale', type=float, help='', default=1) + predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0) + predict_parser.add_argument('--combined', help='to print combined images', action='store_true') + + # Training + training_parser.add_argument('--joints', help='Json file with input joints', + default='data/arrays/joints-nuscenes-190507-0852.json') + training_parser.add_argument('--save', help='whether to save model and log file', action='store_false') + training_parser.add_argument('--hyp', help='run hyperparameters tuning', action='store_true') + training_parser.add_argument('-e', '--epochs', type=int, help='number of epochs to train for', default=150) + training_parser.add_argument('--bs', type=int, default=256, help='input batch size') + training_parser.add_argument('--baseline', help='whether to train using the baseline', action='store_true') + training_parser.add_argument('--dropout', type=float, help='dropout. Default no dropout', default=0.2) + training_parser.add_argument('--lr', type=float, help='learning rate', default=0.002) + training_parser.add_argument('--sched_step', type=float, help='scheduler step time (epochs)', default=20) + training_parser.add_argument('--sched_gamma', type=float, help='Scheduler multiplication every step', default=0.9) + training_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=256) + training_parser.add_argument('--n_stage', type=int, help='Number of stages in the model', default=3) + training_parser.add_argument('--multiplier', type=int, help='Size of the grid of hyp search', default=1) + training_parser.add_argument('--r_seed', type=int, help='specify the seed for training and hyp tuning', default=1) + + # Evaluation + eval_parser.add_argument('--dataset', help='datasets to evaluate, kitti or nuscenes', default='kitti') + eval_parser.add_argument('--geometric', help='to evaluate geometric distance', action='store_true') + eval_parser.add_argument('--dir_ann', help='directory of annotations of 2d joints', + default='/data/lorenzo-data/kitti/annotations/pif_factor_2_april') + eval_parser.add_argument('--model', help='path of MonoLoco model to load', + default="data/models/monoloco-190507-0943.pkl") + eval_parser.add_argument('--joints', help='Json file with input joints to evaluate (for nuscenes)', + default='data/arrays/joints-nuscenes-190508-1102.json') + eval_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0) + eval_parser.add_argument('--run_kitti', help='create txt files for validation of kitti', action='store_true') + eval_parser.add_argument('--dropout', type=float, help='dropout. Default no dropout', default=0.2) + eval_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=256) + eval_parser.add_argument('--n_stage', type=int, help='Number of stages in the model', default=3) + eval_parser.add_argument('--show', help='whether to show eval statistics', action='store_true') + eval_parser.add_argument('--trial', help='just to test', action='store_true') + + args = parser.parse_args() + return args + + +def main(): + args = cli() + + if args.command == 'predict': + _ = predict(args) + + elif args.command == 'prep': + if 'nuscenes' in args.dataset: + prep = PreprocessNuscenes(args.dir_ann, args.dir_nuscenes, args.dataset) + prep.run() + if 'kitti' in args.dataset: + prep = PreprocessKitti(args.dir_ann) + prep.run() + + elif args.command == 'train': + if args.hyp: + hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs, + baseline=args.baseline, dropout=args.dropout, + multiplier=args.multiplier, r_seed=args.r_seed) + hyp_tuning.train() + else: + training = Trainer(joints=args.joints, epochs=args.epochs, bs=args.bs, + baseline=args.baseline, dropout=args.dropout, lr=args.lr, sched_step=args.sched_step, + n_stage=args.n_stage, sched_gamma=args.sched_gamma, hidden_size=args.hidden_size, + r_seed=args.r_seed, save=args.save) + + _ = training.train() + _ = training.evaluate() + + elif args.command == 'eval': + if args.geometric: + geometric_baseline = GeomBaseline(args.joints) + geometric_baseline.run() + + if args.run_kitti: + run_kitti = RunKitti(model=args.model, dir_ann=args.dir_ann, + dropout=args.dropout, hidden_size=args.hidden_size, n_stage=args.n_stage, + n_dropout=args.n_dropout) + run_kitti.run() + + if args.dataset == 'kitti': + kitti_eval = KittiEval(show=args.show) + kitti_eval.run() + + if 'nuscenes' in args.dataset: + training = Trainer(joints=args.joints) + _ = training.evaluate(load=True, model=args.model, debug=False) + + else: + raise ValueError("Main subparser not recognized or not provided") + + +if __name__ == '__main__': + main() diff --git a/src/models/architectures.py b/src/models/architectures.py new file mode 100644 index 0000000..e8a7115 --- /dev/null +++ b/src/models/architectures.py @@ -0,0 +1,120 @@ + +import numpy as np +import torch +import torch.nn as nn + + +class TriLinear(nn.Module): + """ + As Bilinear but without skip connection + """ + def __init__(self, input_size, output_size, p_dropout, linear_size=1024): + super(TriLinear, self).__init__() + + self.input_size = input_size + self.output_size = output_size + self.l_size = linear_size + + self.relu = nn.ReLU(inplace=True) + self.dropout = nn.Dropout(p_dropout) + + self.w1 = nn.Linear(self.input_size, self.l_size) + self.batch_norm1 = nn.BatchNorm1d(self.l_size) + + self.w2 = nn.Linear(self.l_size, self.l_size) + self.batch_norm2 = nn.BatchNorm1d(self.l_size) + + self.w3 = nn.Linear(self.l_size, self.output_size) + + def forward(self, x): + y = self.w1(x) + y = self.batch_norm1(y) + y = self.relu(y) + y = self.dropout(y) + + y = self.w2(y) + y = self.batch_norm2(y) + y = self.relu(y) + y = self.dropout(y) + + y = self.w3(y) + + return y + +def weight_init(m): + """TO initialize weights using kaiming initialization""" + if isinstance(m, nn.Linear): + nn.init.kaiming_normal_(m.weight) + + +class Linear(nn.Module): + def __init__(self, linear_size, p_dropout=0.5): + super(Linear, self).__init__() + self.l_size = linear_size + + self.relu = nn.ReLU(inplace=True) + self.dropout = nn.Dropout(p_dropout) + + self.w1 = nn.Linear(self.l_size, self.l_size) + self.batch_norm1 = nn.BatchNorm1d(self.l_size) + + self.w2 = nn.Linear(self.l_size, self.l_size) + self.batch_norm2 = nn.BatchNorm1d(self.l_size) + + def forward(self, x): + y = self.w1(x) + y = self.batch_norm1(y) + y = self.relu(y) + y = self.dropout(y) + + y = self.w2(y) + y = self.batch_norm2(y) + y = self.relu(y) + y = self.dropout(y) + + out = x + y + + return out + + +class LinearModel(nn.Module): + + """Class from Simple yet effective baseline""" + def __init__(self, input_size, output_size, linear_size=256, p_dropout=0.2, num_stage=3): + super(LinearModel, self).__init__() + + self.input_size = input_size + self.output_size = output_size + self.linear_size = linear_size + self.p_dropout = p_dropout + self.num_stage = num_stage + + # process input to linear size + self.w1 = nn.Linear(self.input_size, self.linear_size) + self.batch_norm1 = nn.BatchNorm1d(self.linear_size) + + self.linear_stages = [] + for l in range(num_stage): + self.linear_stages.append(Linear(self.linear_size, self.p_dropout)) + self.linear_stages = nn.ModuleList(self.linear_stages) + + # post processing + self.w2 = nn.Linear(self.linear_size, self.output_size) + + self.relu = nn.ReLU(inplace=True) + self.dropout = nn.Dropout(self.p_dropout) + + def forward(self, x): + # pre-processing + y = self.w1(x) + y = self.batch_norm1(y) + y = self.relu(y) + y = self.dropout(y) + + # linear layers + for i in range(self.num_stage): + y = self.linear_stages[i](y) + + y = self.w2(y) + + return y \ No newline at end of file diff --git a/src/models/datasets.py b/src/models/datasets.py new file mode 100644 index 0000000..ee13ebb --- /dev/null +++ b/src/models/datasets.py @@ -0,0 +1,66 @@ + +import json +import numpy as np +import torch +import os +from collections import defaultdict + +from torch.utils.data import Dataset + + +class NuScenesDataset(Dataset): + """ + Get mask joints or ground truth joints and transform into tensors + """ + + def __init__(self, joints, phase): + """ + Load inputs and outputs from the pickles files from gt joints, mask joints or both + """ + assert(phase in ['train', 'val', 'test']) + + with open(joints, 'r') as f: + dic_jo = json.load(f) + + # Define input and output for normal training and inference + self.inputs = np.array(dic_jo[phase]['X']) + self.outputs = np.array(dic_jo[phase]['Y']).reshape(-1, 1) + self.names = dic_jo[phase]['names'] + self.kps = np.array(dic_jo[phase]['kps']) + + # Extract annotations divided in clusters + self.dic_clst = dic_jo[phase]['clst'] + + def __len__(self): + """ + :return: number of samples (m) + """ + return self.inputs.shape[0] + + def __getitem__(self, idx): + """ + Reading the tensors when required. E.g. Retrieving one element or one batch at a time + :param idx: corresponding to m + """ + inputs = torch.from_numpy(self.inputs[idx, :]).float() + outputs = torch.from_numpy(np.array(self.outputs[idx])).float() + names = self.names[idx] + kps = self.kps[idx, :] + + return inputs, outputs, names, kps + + def get_cluster_annotations(self, clst): + """Return normalized annotations corresponding to a certain cluster + """ + inputs = torch.from_numpy(np.array(self.dic_clst[clst]['X'])).float() + outputs = torch.from_numpy(np.array(self.dic_clst[clst]['Y'])).float() + count = len(self.dic_clst[clst]['Y']) + + return inputs, outputs, count + + + + + + + diff --git a/src/models/hyp_tuning.py b/src/models/hyp_tuning.py new file mode 100644 index 0000000..90e1750 --- /dev/null +++ b/src/models/hyp_tuning.py @@ -0,0 +1,130 @@ +import math +import os +import json +import time +import logging +import torch +import random +import datetime +import numpy as np +from models.trainer import Trainer + + +class HypTuning: + + def __init__(self, joints, epochs, baseline, dropout, multiplier=1, r_seed=1): + """ + Initialize directories, load the data and parameters for the training + """ + + # Initialize Directories + self.joints = joints + self.baseline = baseline + self.dropout = dropout + self.num_epochs = epochs + self.baseline = baseline + self.r_seed = r_seed + dir_out = os.path.join('data', 'models') + dir_logs = os.path.join('data', 'logs') + assert os.path.exists(dir_out), "Output directory not found" + if not os.path.exists(dir_logs): + os.makedirs(dir_logs) + + now = datetime.datetime.now() + now_time = now.strftime("%Y%m%d-%H%M")[2:] + + if baseline: + name_out = 'hyp-baseline-' + now_time + else: + name_out = 'hyp-monoloco-' + now_time + + self.path_log = os.path.join(dir_logs, name_out + now_time) + self.path_model = os.path.join(dir_out, name_out + now_time + '.pkl') + + logging.basicConfig(level=logging.INFO) + self.logger = logging.getLogger(__name__) + + # Initialize grid of parameters + random.seed(r_seed) + np.random.seed(r_seed) + self.sched_gamma_list = [0.8, 0.9, 1, 0.8, 0.9, 1] * multiplier + random.shuffle(self.sched_gamma_list) + self.sched_step = [10, 20, 30, 40, 50, 60] * multiplier + random.shuffle(self.sched_step) + self.bs_list = [64, 128, 256, 512, 1024, 2048] * multiplier + random.shuffle(self.bs_list) + self.hidden_list = [128, 256, 512, 128, 256, 512] * multiplier + random.shuffle(self.hidden_list) + self.n_stage_list = [3, 3, 3, 3, 3, 3] * multiplier + random.shuffle(self.n_stage_list) + # Learning rate + aa = math.log(0.001, 10) + bb = math.log(0.03, 10) + log_lr_list = np.random.uniform(aa, bb, int(6 * multiplier)).tolist() + self.lr_list = [10 ** xx for xx in log_lr_list] + # plt.hist(self.lr_list, bins=50) + # plt.show() + + def train(self): + """Train multiple times using log-space random search""" + + best_acc_val = 20 + dic_best = {} + dic_err_best = {} + start = time.time() + cnt = 0 + for idx, lr in enumerate(self.lr_list): + bs = self.bs_list[idx] + sched_gamma = self.sched_gamma_list[idx] + sched_step = self.sched_step[idx] + hidden_size = self.hidden_list[idx] + n_stage = self.n_stage_list[idx] + + training = Trainer(joints=self.joints, epochs=self.num_epochs, + bs=bs, baseline=self.baseline, dropout=self.dropout, lr=lr, sched_step=sched_step, + sched_gamma=sched_gamma, hidden_size=hidden_size, n_stage=n_stage, + save=False, print_loss=False, r_seed=self.r_seed) + + best_epoch = training.train() + dic_err, model = training.evaluate() + acc_val = dic_err['val']['all']['mean'] + cnt += 1 + print("Combination number: {}".format(cnt)) + + if acc_val < best_acc_val: + dic_best['lr'] = lr + dic_best['joints'] = self.joints + dic_best['bs'] = bs + dic_best['baseline'] = self.baseline + dic_best['sched_gamma'] = sched_gamma + dic_best['sched_step'] = sched_step + dic_best['hidden_size'] = hidden_size + dic_best['n_stage'] = n_stage + dic_best['acc_val'] = dic_err['val']['all']['mean'] + dic_best['best_epoch'] = best_epoch + dic_best['random_seed'] = self.r_seed + # dic_best['acc_test'] = dic_err['test']['all']['mean'] + + dic_err_best = dic_err + best_acc_val = acc_val + model_best = model + torch.save(model_best.state_dict(), self.path_model) + + with open(self.path_log, 'w') as f: + json.dump(dic_best, f) + + end = time.time() + print('\n\n\n') + self.logger.info(" Tried {} combinations".format(cnt)) + self.logger.info(" Total time for hyperparameters search: {:.2f} minutes".format((end - start) / 60)) + self.logger.info(" Best hyperparameters are:") + for key, value in dic_best.items(): + self.logger.info(" {}: {}".format(key, value)) + + print() + self.logger.info("Accuracy in each cluster:") + + for key in dic_err_best['val']: + self.logger.info("Val: error in cluster {} = {} ".format(key, dic_err_best['val'][key]['mean'])) + self.logger.info("Final accuracy Val: {:.2f}".format(dic_best['acc_val'])) + self.logger.info("\nSaved the model: {}".format(self.path_model)) diff --git a/src/models/losses.py b/src/models/losses.py new file mode 100644 index 0000000..4572ada --- /dev/null +++ b/src/models/losses.py @@ -0,0 +1,144 @@ +import math +import torch +import numpy as np +import matplotlib.pyplot as plt + + +class CustomL1Loss(torch.nn.Module): + """ + L1 loss with more weight to errors at a shorter distance + It inherits from nn.module so it supports backward + """ + + def __init__(self, dic_norm, device, beta=1): + super(CustomL1Loss, self).__init__() + + self.dic_norm = dic_norm + self.device = device + self.beta = beta + + @staticmethod + def compute_weights(xx, beta=1): + """ + Return the appropriate weight depending on the distance and the hyperparameter chosen + alpha = 1 refers to the curve of A Photogrammetric Approach for Real-time... + It is made for unnormalized outputs (to be more understandable) + From 70 meters on every value is weighted the same (0.1**beta) + Alpha is optional value from Focal loss. Yet to be analyzed + """ + # alpha = np.maximum(1, 10 ** (beta - 1)) + alpha = 1 + ww = np.maximum(0.1, 1 - xx / 78)**beta + + return alpha * ww + + def print_loss(self): + xx = np.linspace(0, 80, 100) + y1 = self.compute_weights(xx, beta=1) + y2 = self.compute_weights(xx, beta=2) + y3 = self.compute_weights(xx, beta=3) + plt.plot(xx, y1) + plt.plot(xx, y2) + plt.plot(xx, y3) + plt.xlabel("Distance [m]") + plt.ylabel("Loss function Weight") + plt.legend(("Beta = 1", "Beta = 2", "Beta = 3")) + plt.show() + + def forward(self, output, target): + + unnormalized_output = output.cpu().detach().numpy() * self.dic_norm['std']['Y'] + self.dic_norm['mean']['Y'] + weights_np = self.compute_weights(unnormalized_output, self.beta) + weights = torch.from_numpy(weights_np).float().to(self.device) # To make weights in the same cuda device + losses = torch.abs(output - target) * weights + loss = losses.mean() # Mean over the batch + # self.print_loss() + + return loss + + +class LaplacianLoss(torch.nn.Module): + """1D Gaussian with std depending on the absolute distance + """ + def __init__(self, size_average=True, reduce=True, evaluate=False): + super(LaplacianLoss, self).__init__() + self.size_average = size_average + self.reduce = reduce + self.evaluate = evaluate + + def laplacian_1d(self, mu_si, xx): + """ + 1D Gaussian Loss. f(x | mu, sigma). The network outputs mu and sigma. X is the ground truth distance. + This supports backward(). + Inspired by + https://github.com/naba89/RNN-Handwriting-Generation-Pytorch/blob/master/loss_functions.py + + """ + mu, si = mu_si[:, 0:1], mu_si[:, 1:2] + # norm = xx - mu + norm = 1 - mu / xx # Relative + + term_a = torch.abs(norm) * torch.exp(-si) + term_b = si + norm_bi = (np.mean(np.abs(norm.cpu().detach().numpy())), np.mean(torch.exp(si).cpu().detach().numpy())) + + if self.evaluate: + return norm_bi + else: + return term_a + term_b + + def forward(self, outputs, targets): + + values = self.laplacian_1d(outputs, targets) + + if not self.reduce or self.evaluate: + return values + if self.size_average: + mean_values = torch.mean(values) + return mean_values + return torch.sum(values) + + +class GaussianLoss(torch.nn.Module): + """1D Gaussian with std depending on the absolute distance + """ + def __init__(self, device, size_average=True, reduce=True, evaluate=False): + super(GaussianLoss, self).__init__() + self.size_average = size_average + self.reduce = reduce + self.evaluate = evaluate + self.device = device + + def gaussian_1d(self, mu_si, xx): + """ + 1D Gaussian Loss. f(x | mu, sigma). The network outputs mu and sigma. X is the ground truth distance. + This supports backward(). + Inspired by + https://github.com/naba89/RNN-Handwriting-Generation-Pytorch/blob/master/loss_functions.py + + """ + mu, si = mu_si[:, 0:1], mu_si[:, 1:2] + + min_si = torch.ones(si.size()).cuda(self.device) * 0.1 + si = torch.max(min_si, si) + norm = xx - mu + term_a = (norm / si)**2 / 2 + term_b = torch.log(si * math.sqrt(2 * math.pi)) + + norm_si = (np.mean(np.abs(norm.cpu().detach().numpy())), np.mean(si.cpu().detach().numpy())) + + if self.evaluate: + return norm_si + else: + return term_a + term_b + + def forward(self, outputs, targets): + + values = self.gaussian_1d(outputs, targets) + + if not self.reduce or self.evaluate: + return values + if self.size_average: + mean_values = torch.mean(values) + return mean_values + return torch.sum(values) diff --git a/src/models/trainer.py b/src/models/trainer.py new file mode 100644 index 0000000..a4282fa --- /dev/null +++ b/src/models/trainer.py @@ -0,0 +1,358 @@ +import torch +import torch.nn as nn +import copy +import numpy as np +import matplotlib.pyplot as plt +import os +import datetime +import logging +from collections import defaultdict +import json +import sys +import time + +from torch.utils.data import DataLoader +from torch.optim import lr_scheduler + +from models.datasets import NuScenesDataset +from models.architectures import LinearModel +from utils.misc import set_logger +from models.losses import LaplacianLoss + + +class Trainer: + """ + Training and evaluation of a neural network which predicts 3D localization and confidence intervals + given 2d joints + """ + + def __init__(self, joints, epochs=100, bs=256, dropout=0.2, lr=0.002, + sched_step=20, sched_gamma=1, hidden_size=256, n_stage=3, r_seed=1, n_dropout=0, n_samples=100, + baseline=False, save=False, print_loss=False): + """ + Initialize directories, load the data and parameters for the training + """ + + # Initialize directories and parameters + dir_out = os.path.join('data', 'models') + assert os.path.exists(dir_out), "Output directory not found" + dir_logs = os.path.join('data', 'logs') + if not os.path.exists(dir_logs): + os.makedirs(dir_logs) + assert os.path.exists(joints), "Input file not found" + + self.joints = joints + self.num_epochs = epochs + self.save = save + self.print_loss = print_loss + self.baseline = baseline + self.lr = lr + self.sched_step = sched_step + self.sched_gamma = sched_gamma + n_joints = 17 + input_size = n_joints * 2 + self.output_size = 2 + self.clusters = ['10', '20', '30', '>30'] + self.hidden_size = hidden_size + self.n_stage = n_stage + self.dir_out = dir_out + self.n_dropout = n_dropout + self.n_samples = n_samples + self.r_seed = r_seed + + from utils.normalize import unnormalize_bi + self.unnormalize_bi = unnormalize_bi + from utils.misc import epistemic_variance, laplace_sampling + self.epistemic_variance = epistemic_variance + self.laplace_sampling = laplace_sampling + + # Loss functions and output names + now = datetime.datetime.now() + now_time = now.strftime("%Y%m%d-%H%M")[2:] + + if baseline: + name_out = 'baseline-' + now_time + self.criterion = nn.L1Loss().cuda() + self.output_size = 1 + else: + name_out = 'monoloco-' + now_time + self.criterion = LaplacianLoss().cuda() + self.output_size = 2 + self.criterion_eval = nn.L1Loss().cuda() + + if self.save: + self.path_model = os.path.join(dir_out, name_out + '.pkl') + self.logger = set_logger(os.path.join(dir_logs, name_out)) + self.logger.info("Training arguments: \nepochs: {} \nbatch_size: {} \ndropout: {}" + "\nbaseline: {} \nlearning rate: {} \nscheduler step: {} \nscheduler gamma: {} " + "\ninput_size: {} \nhidden_size: {} \nn_stages: {} \nr_seed: {}" + "\ninput_file: {}" + .format(epochs, bs, dropout, baseline, lr, sched_step, sched_gamma, input_size, + hidden_size, n_stage, r_seed, self.joints)) + else: + logging.basicConfig(level=logging.INFO) + self.logger = logging.getLogger(__name__) + + # Select the device and load the data + use_cuda = torch.cuda.is_available() + self.device = torch.device("cuda:0" if use_cuda else "cpu") + print('Device: ', self.device) + + # Set the seed for random initialization + torch.manual_seed(r_seed) + if use_cuda: + torch.cuda.manual_seed(r_seed) + + # Dataloader + self.dataloaders = {phase: DataLoader(NuScenesDataset(self.joints, phase=phase), + batch_size=bs, shuffle=True) for phase in ['train', 'val']} + + self.dataset_sizes = {phase: len(NuScenesDataset(self.joints, phase=phase)) + for phase in ['train', 'val', 'test']} + + # Define the model + self.logger.info('Sizes of the dataset: {}'.format(self.dataset_sizes)) + print(">>> creating model") + self.model = LinearModel(input_size=input_size, output_size=self.output_size, linear_size=hidden_size, + p_dropout=dropout, num_stage=self.n_stage) + self.model.to(self.device) + print(">>> total params: {:.2f}M".format(sum(p.numel() for p in self.model.parameters()) / 1000000.0)) + + # Optimizer and scheduler + self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=lr) + self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=self.sched_step, gamma=self.sched_gamma) + + def train(self): + + # Initialize the variable containing model weights + since = time.time() + best_model_wts = copy.deepcopy(self.model.state_dict()) + best_acc = 1e6 + best_epoch = 0 + epoch_losses_tr = [] + epoch_losses_val = [] + epoch_norms = [] + epoch_sis = [] + + for epoch in range(self.num_epochs): + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + self.scheduler.step() + self.model.train() # Set model to training mode + else: + self.model.eval() # Set model to evaluate mode + + running_loss_tr = 0.0 + running_loss_eval = 0.0 + norm_tr = 0.0 + bi_tr = 0.0 + + # Iterate over data. + for inputs, labels, _, _ in self.dataloaders[phase]: + inputs = inputs.to(self.device) + labels = labels.to(self.device) + + # zero the parameter gradients + self.optimizer.zero_grad() + + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = self.model(inputs) + + if self.output_size == 2: + outputs_eval = outputs[:, 0:1] # Fundamental to put slices + else: + outputs_eval = outputs + + loss = self.criterion(outputs, labels) + loss_eval = self.criterion_eval(outputs_eval, labels) # L1 loss to evaluation + + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + self.optimizer.step() + + # statistics + running_loss_tr += loss.item() * inputs.size(0) + running_loss_eval += loss_eval.item() * inputs.size(0) + + epoch_loss = running_loss_tr / self.dataset_sizes[phase] + epoch_acc = running_loss_eval / self.dataset_sizes[phase] # Average distance in meters + epoch_norm = float(norm_tr / self.dataset_sizes[phase]) + epoch_si = float(bi_tr / self.dataset_sizes[phase]) + if phase == 'train': + epoch_losses_tr.append(epoch_loss) + epoch_norms.append(epoch_norm) + epoch_sis.append(epoch_si) + else: + epoch_losses_val.append(epoch_acc) + + if epoch % 5 == 1: + sys.stdout.write('\r' + 'Epoch: {:.0f} Training Loss: {:.3f} Val Loss {:.3f}' + .format(epoch, epoch_losses_tr[-1], epoch_losses_val[-1]) + '\t') + + # deep copy the model + if phase == 'val' and epoch_acc < best_acc: + best_acc = epoch_acc + best_epoch = epoch + best_model_wts = copy.deepcopy(self.model.state_dict()) + + time_elapsed = time.time() - since + self.logger.info('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) + self.logger.info('Best val Acc: {:3f}'.format(best_acc)) + self.logger.info('Saved weights of the model at epoch: {}\n'.format(best_epoch)) + + if self.print_loss: + epoch_losses_val_scaled = [x - 4 for x in epoch_losses_val] # to compare with L1 Loss + plt.plot(epoch_losses_tr[10:], label='Training Loss') + plt.plot(epoch_losses_val_scaled[10:], label='Validation Loss') + plt.legend() + plt.show() + + # load best model weights + self.model.load_state_dict(best_model_wts) + + return best_epoch + + def evaluate(self, load=False, model=None, debug=False): + + # To load a model instead of using the trained one + if load: + # self.path_out = os.path.join(self.dir_out, 'best_model_paper.pickle') + self.model.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage)) + + # Average distance on training and test set after unnormalizing + self.model.eval() + dic_err = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0))) # initialized to zero + phase = 'val' + dataloader_eval = DataLoader(NuScenesDataset(self.joints, phase=phase), + batch_size=5000, shuffle=True) + + size_eval = len(NuScenesDataset(self.joints, phase=phase)) + + with torch.no_grad(): + for inputs, labels, _, _ in dataloader_eval: + inputs = inputs.to(self.device) + labels = labels.to(self.device) + + # Debug plot for input-output distributions + if debug: + inputs_shoulder = inputs.cpu().numpy()[:, 5] + inputs_hip = inputs.cpu().numpy()[:, 11] + labels = labels.cpu().numpy() + heights = inputs_hip - inputs_shoulder + plt.figure(1) + plt.hist(heights, bins='auto') + plt.show() + plt.figure(2) + plt.hist(labels, bins='auto') + plt.show() + exit() + + # Manually reactivate dropout in eval + self.model.dropout.training = True + total_outputs = torch.empty((0, len(labels))).to(self.device) + + if self.n_dropout > 0: + for ii in range(self.n_dropout): + outputs = self.model(inputs) + outputs = self.unnormalize_bi(outputs) + samples = self.laplace_sampling(outputs, self.n_samples) + total_outputs = torch.cat((total_outputs, samples), 0) + varss = total_outputs.std(0) + else: + varss = [0] + + # Don't use dropout for the mean prediction + self.model.dropout.training = False + + # Forward pass + outputs = self.model(inputs) + + if not self.baseline: + outputs = self.unnormalize_bi(outputs) + + avg_distance = float(self.criterion_eval(outputs[:, 0:1], labels).item()) + + dic_err[phase]['all'] = self.compute_stats(outputs, labels, varss, dic_err[phase]['all'], size_eval) + + print('-'*120) + self.logger.info("\nAverage distance on the {} set: {:.2f}" + .format(phase, dic_err[phase]['all']['mean'])) + + self.logger.info("Aleatoric Uncertainty: {:.2f}, inside the interval: {:.1f}%" + .format(dic_err[phase]['all']['bi'], dic_err[phase]['all']['conf_bi']*100)) + + # TODO Add evaluation variance + # self.logger.info("Combined Uncertainty: {:.2f} with a max of {:.2f}, inside the interval: {:.1f}%\n" + # .format(stat_var[0], stat_var[1], stat_var[2]*100)) + + # Evaluate performances on different clusters and save statistics + + nuscenes = NuScenesDataset(self.joints, phase=phase) + for clst in self.clusters: + inputs, labels, size_eval = nuscenes.get_cluster_annotations(clst) + inputs, labels = inputs.to(self.device), labels.to(self.device) + + # Forward pass on each cluster + outputs = self.model(inputs) + if not self.baseline: + outputs = self.unnormalize_bi(outputs) + + dic_err[phase][clst] = self.compute_stats(outputs, labels, [0], dic_err[phase][clst], size_eval) + + self.logger.info("{} error in cluster {} = {:.2f} for {} instances. " + "Aleatoric of {:.2f} with {:.1f}% inside the interval" + .format(phase, clst, dic_err[phase][clst]['mean'], size_eval, + dic_err[phase][clst]['bi'], dic_err[phase][clst]['conf_bi'] * 100)) + + # Save the model and the results + if self.save and not load: + torch.save(self.model.state_dict(), self.path_model) + print('-'*120) + self.logger.info("model saved: {} \n".format(self.path_model)) + else: + self.logger.info("model not saved\n") + + return dic_err, self.model + + def compute_stats(self, outputs, labels_orig, varss, dic_err, size_eval): + """Compute mean std (aleatoric) and max of torch tensors""" + + labels = labels_orig.view(-1, ) + mean_mu = float(self.criterion_eval(outputs[:, 0], labels).item()) + max_mu = float(torch.max(torch.abs((outputs[:, 0] - labels))).item()) + + if self.baseline: + return (mean_mu, max_mu), (0, 0, 0) + + else: + mean_bi = torch.mean(outputs[:, 1]).item() + max_bi = torch.max(outputs[:, 1]).item() + + low_bound_bi = labels >= (outputs[:, 0] - outputs[:, 1]) + up_bound_bi = labels <= (outputs[:, 0] + outputs[:, 1]) + bools_bi = low_bound_bi & up_bound_bi + conf_bi = float(torch.sum(bools_bi)) / float(bools_bi.shape[0]) + + if varss[0] == 0: + aa = 5 + + else: + mean_var = torch.mean(varss).item() + max_var = torch.max(varss).item() + + low_bound_var = labels >= (outputs[:, 0] - varss) + up_bound_var = labels <= (outputs[:, 0] + varss) + bools_var = low_bound_var & up_bound_var + conf_var = float(torch.sum(bools_var)) / float(bools_var.shape[0]) + + dic_err['mean'] += mean_mu * (outputs.size(0) / size_eval) + dic_err['bi'] += mean_bi * (outputs.size(0) / size_eval) + dic_err['count'] += (outputs.size(0) / size_eval) + dic_err['conf_bi'] += conf_bi * (outputs.size(0) / size_eval) + + return dic_err diff --git a/src/predict/predict_2d_3d.py b/src/predict/predict_2d_3d.py new file mode 100644 index 0000000..bf0d637 --- /dev/null +++ b/src/predict/predict_2d_3d.py @@ -0,0 +1,168 @@ +"""Predict poses for given images.""" + +import argparse +import glob +import json +import os + +import numpy as np +import torch + +from openpifpaf.network import nets +from openpifpaf import decoder, show +from openpifpaf import transforms +from predict.predict_monoloco import PredictMonoLoco +from utils.pifpaf import preprocess_pif + +import torchvision +import torch + +from PIL import Image, ImageFile + + +class ImageList(torch.utils.data.Dataset): + def __init__(self, image_paths, scale, image_transform=None): + self.image_paths = image_paths + self.image_transform = image_transform or transforms.image_transform + self.scale = scale + + # data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2 + # .0) + + def __getitem__(self, index): + image_path = self.image_paths[index] + ImageFile.LOAD_TRUNCATED_IMAGES = True + with open(image_path, 'rb') as f: + image = Image.open(f).convert('RGB') + + if self.scale > 1.01 or self.scale < 0.99: + image = torchvision.transforms.functional.resize(image, + (round(self.scale * image.size[1]), + round(self.scale * image.size[0])), + interpolation=Image.BICUBIC) + original_image = torchvision.transforms.functional.to_tensor(image) + image = self.image_transform(image) + + return image_path, original_image, image + + def __len__(self): + return len(self.image_paths) + + +def elaborate_cli(args): + + # Merge the model_pifpaf argument + if not args.checkpoint: + args.checkpoint = args.model_pifpaf + # glob + if args.glob: + args.images += glob.glob(args.glob) + if not args.images: + raise Exception("no image files given") + + # add args.device + args.device = torch.device('cpu') + args.pin_memory = False + if torch.cuda.is_available(): + args.device = torch.device('cuda') + args.pin_memory = True + + # Add num_workers + args.loader_workers = 8 + + # Add visualization defaults + args.figure_width = 10 + args.dpi_factor = 1.0 + + return args + + +def predict(args): + + elaborate_cli(args) + + # load model + model, _ = nets.factory_from_args(args) + model = model.to(args.device) + processor = decoder.factory_from_args(args, model) + + # data + data = ImageList(args.images, scale=args.scale) + data_loader = torch.utils.data.DataLoader( + data, batch_size=1, shuffle=False, + pin_memory=args.pin_memory, num_workers=args.loader_workers) + + # Visualizer + keypoint_painter = show.KeypointPainter(show_box=True) + skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True, + markersize=1, linewidth=4) + + keypoints_whole = [] + for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader): + images = image_tensors.permute(0, 2, 3, 1) + + processed_images = processed_images_cpu.to(args.device, non_blocking=True) + fields_batch = processor.fields(processed_images) + + # unbatch + for image_path, image, processed_image_cpu, fields in zip( + image_paths, + images, + processed_images_cpu, + fields_batch): + + if args.output_directory is None: + output_path = image_path + else: + file_name = os.path.basename(image_path) + output_path = os.path.join(args.output_directory, file_name) + print('image', idx, image_path, output_path) + + processor.set_cpu_image(image, processed_image_cpu) + keypoint_sets, scores = processor.keypoint_sets(fields) + + # Correct to not change the confidence + scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3) + + pifpaf_out = [ + {'keypoints': np.around(kps / scale_np, 1).reshape(-1).tolist(), + 'bbox': [np.min(kps[:, 0]) / args.scale, np.min(kps[:, 1]) / args.scale, + np.max(kps[:, 0]) / args.scale, np.max(kps[:, 1]) / args.scale]} + for kps in keypoint_sets + ] + + # Save json file + if 'pifpaf' in args.networks: + + if 'json' in args.output_types and keypoint_sets.size > 0: + with open(output_path + '.pifpaf.json', 'w') as f: + json.dump(pifpaf_out, f) + + if keypoint_sets.size > 0: + keypoints_whole.append(np.around((keypoint_sets / scale_np), 1) + .reshape(keypoint_sets.shape[0], -1).tolist()) + + if 'keypoints' in args.output_types: + with show.image_canvas(image, + output_path + '.keypoints.png', + show=args.show, + fig_width=args.figure_width, + dpi_factor=args.dpi_factor) as ax: + keypoint_painter.keypoints(ax, keypoint_sets) + + if 'skeleton' in args.output_types: + with show.image_canvas(image, + output_path + '.skeleton.png', + show=args.show, + fig_width=args.figure_width, + dpi_factor=args.dpi_factor) as ax: + skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) + + if 'monoloco' in args.networks: + im_size = (float(image.size()[1] / args.scale), + float(image.size()[0] / args.scale)) # Width, Height (original) + boxes, keypoints = preprocess_pif(pifpaf_out, im_size) + predict_monoloco = PredictMonoLoco(boxes, keypoints, image_path, output_path, args) + predict_monoloco.run() + + return keypoints_whole diff --git a/src/predict/predict_monoloco.py b/src/predict/predict_monoloco.py new file mode 100644 index 0000000..90e99db --- /dev/null +++ b/src/predict/predict_monoloco.py @@ -0,0 +1,196 @@ + +""" +From a json file output images and json annotations +""" + +import torch +import sys +import numpy as np +from collections import defaultdict +import os +import json +import logging +import time +from models.architectures import LinearModel + + +class PredictMonoLoco: + + def __init__(self, boxes, keypoints, image_path, output_path, args): + + logging.basicConfig(level=logging.INFO) + self.logger = logging.getLogger(__name__) + basename, _ = os.path.splitext(os.path.basename(image_path)) + + # Check for ground-truth file + try: + with open(args.path_gt, 'r') as f: + self.dic_names = json.load(f) + except FileNotFoundError: + self.dic_names = None + + print('-' * 120) + print("Monoloco: ground truth file not found") + print('-' * 120) + + # Check for calibration file + if not args.no_calib: + self.path_calib = os.path.join(args.dir_calib, basename + '.txt') + assert os.path.isfile(self.path_calib), "Calibration file not found. Change dir or use --no_calib" + + self.boxes = boxes + self.keypoints = keypoints + self.image_path = image_path + self.output_path = output_path + self.device = args.device + self.draw_kps = args.draw_kps + self.y_scale = args.y_scale # y_scale = 1.85 for kitti combined + self.no_calib = args.no_calib + self.z_max = args.z_max + self.output_types = args.output_types + self.show = args.show + output_size = 2 + self.n_samples = 100 + self.n_dropout = args.n_dropout + if self.n_dropout > 0: + self.epistemic = True + else: + self.epistemic = False + self.iou_min = 0.25 + + # load the model + input_size = 17 * 2 + + # self.model = TriLinear(input_size=input_size, output_size=output_size, p_dropout=dropout) + self.model = LinearModel(input_size=input_size, output_size=output_size) + self.model.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage)) + self.model.eval() # Default is train + self.model.to(self.device) + + # Import + from utils.camera import preprocess_single, get_keypoints, get_depth + self.preprocess_single = preprocess_single + self.get_keypoints = get_keypoints + self.get_depth = get_depth + + from utils.misc import epistemic_variance, laplace_sampling, get_idx_max + self.epistemic_variance = epistemic_variance + self.laplace_sampling = laplace_sampling + self.get_idx_max = get_idx_max + from visuals.printer import Printer + self.Printer = Printer + + from utils.normalize import unnormalize_bi + self.unnormalize_bi = unnormalize_bi + + from utils.kitti import get_simplified_calibration, get_calibration + self.get_simplified_calibration = get_simplified_calibration + self.get_calibration = get_calibration + + from utils.pifpaf import get_input_data + self.get_input_data = get_input_data + + def run(self): + + cnt = 0 + + if self.no_calib: + # kk = [[1266.4, 0., 816.27], [0, 1266.4, 491.5], [0., 0., 1.]] + kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] + + else: + kk, tt = self.get_calibration(self.path_calib) + # kk = self.get_simplified_calibration(self.path_calib) + + # kk = [[1266.4, 0., 816.27], [0, 1266.4, 491.5], [0., 0., 1.]] + # self.factor = 1 + # self.y_scale = 1 + (inputs_norm, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = \ + self.get_input_data(self.boxes, self.keypoints, kk, left_to_right=True) + + # Conversion into torch tensor + if len(inputs_norm) > 0: + with torch.no_grad(): + inputs = torch.from_numpy(np.array(inputs_norm)).float() + inputs = inputs.to(self.device) + # self.model.to("cpu") + start = time.time() + # Manually reactivate dropout in eval + self.model.dropout.training = True + total_outputs = torch.empty((0, len(xy_kps))).to(self.device) + + if self.n_dropout > 0: + for ii in range(self.n_dropout): + outputs = self.model(inputs) + outputs = self.unnormalize_bi(outputs) + samples = self.laplace_sampling(outputs, self.n_samples) + total_outputs = torch.cat((total_outputs, samples), 0) + varss = total_outputs.std(0) + else: + varss = [0] * len(inputs_norm) + + # # Don't use dropout for the mean prediction + start_single = time.time() + self.model.dropout.training = False + outputs = self.model(inputs) + outputs = self.unnormalize_bi(outputs) + end = time.time() + print("Total Forward pass time = {:.2f} ms".format((end-start) * 1000)) + print("Single pass time = {:.2f} ms".format((end - start_single) * 1000)) + + # Print image and save json + dic_out = defaultdict(list) + name = os.path.basename(self.image_path) + if self.dic_names: + boxes_gt, dds_gt = self.dic_names[name]['boxes'], self.dic_names[name]['dds'] + + for idx, box in enumerate(uv_boxes): + dd_pred = float(outputs[idx][0]) + ale = float(outputs[idx][1]) + var_y = float(varss[idx]) + + # Find the corresponding ground truth if available + if self.dic_names: + + idx_max, iou_max = self.get_idx_max(box, boxes_gt) + if iou_max > self.iou_min: + dd_real = dds_gt[idx_max] + boxes_gt.pop(idx_max) + dds_gt.pop(idx_max) + + # In case of no matching + else: + dd_real = 0 + # In case of no ground truth + else: + dd_real = dd_pred + + uv_center = uv_centers[idx] + xyz_real = self.get_depth(uv_center, kk, dd_real) + xyz_pred = self.get_depth(uv_center, kk, dd_pred) + + dic_out['boxes'].append(box) + dic_out['dds_real'].append(dd_real) + dic_out['dds_pred'].append(dd_pred) + dic_out['stds_ale'].append(ale) + dic_out['stds_epi'].append(var_y) + dic_out['xyz_real'].append(xyz_real) + dic_out['xyz_pred'].append(xyz_pred) + dic_out['xy_kps'].append(xy_kps[idx]) + dic_out['uv_kps'].append(uv_kps[idx]) + dic_out['uv_centers'].append(uv_center) + dic_out['uv_shoulders'].append(uv_shoulders[idx]) + + if any((xx in self.output_types for xx in ['front', 'bird', 'combined'])): + + printer = self.Printer(self.image_path, self.output_path, dic_out, kk, + y_scale=self.y_scale, output_types=self.output_types, + show=self.show, z_max=self.z_max, epistemic=self.epistemic) + printer.print() + + if 'json' in self.output_types: + + with open(os.path.join(self.output_path + '.monoloco.json'), 'w') as ff: + json.dump(dic_out, ff) + + sys.stdout.write('\r' + 'Saving image {}'.format(cnt) + '\t') diff --git a/src/utils/camera.py b/src/utils/camera.py new file mode 100644 index 0000000..d188fc6 --- /dev/null +++ b/src/utils/camera.py @@ -0,0 +1,202 @@ + +import numpy as np +import math + + +def pixel_to_camera(uv1, kk, z_met): + """ + (3,) array --> (3,) array + Convert a point in pixel coordinate to absolute camera coordinates + """ + + kk_1 = np.linalg.inv(kk) + xyz_met_norm = np.dot(kk_1, uv1) + xyz_met = xyz_met_norm * z_met + return xyz_met + + +def project_to_pixels(xyz, kk): + """Project a single point in space into the image""" + xx, yy, zz = np.dot(kk, xyz) + uu = int(xx / zz) + vv = int(yy / zz) + + return uu, vv + + +def project_3d(box_obj, kk): + """ + Project a 3D bounding box into the image plane using the central corners + """ + + box_2d = [] + + # Obtain the 3d points of the box + xc, yc, zc = box_obj.center + ww, ll, hh, = box_obj.wlh + + # Points corresponding to a box at the z of the center + x1 = xc - ww/2 + y1 = yc - hh/2 # Y axis directed below + x2 = xc + ww/2 + y2 = yc + hh/2 + xyz1 = np.array([x1, y1, zc]) + xyz2 = np.array([x2, y2, zc]) + corners_3d = np.array([xyz1, xyz2]) + + # Project them and convert into pixel coordinates + for xyz in corners_3d: + xx, yy, zz = np.dot(kk, xyz) + uu = xx / zz + vv = yy / zz + box_2d.append(uu) + box_2d.append(vv) + + return box_2d + + +def preprocess_single(kps, kk): + + """ Preprocess input of a single annotations + Input_kps = list of 4 elements with 0=x, 1=y, 2= confidence, 3 = ? in pixels + Output_kps = [x0, y0, x1,...x15, y15] in meters normalized (z=1) and zero-centered using the center of the box + """ + + kps_uv = [] + kps_0c = [] + kps_orig = [] + + # Create center of the bounding box using min max of the keypoints + uu_c, vv_c = get_keypoints(kps[0], kps[1], mode='center') + uv_center = np.array([uu_c, vv_c, 1]) + + # Create a list of single arrays of (u, v, 1) + for idx, _ in enumerate(kps[0]): + uv_kp = np.array([kps[0][idx], kps[1][idx], 1]) + kps_uv.append(uv_kp) + + # Take the ground joint + vv_gr = max(kps[1]) + + # Projection in normalized image coordinates and zero-center with the center of the bounding box + xy1_center = pixel_to_camera(uv_center, kk, 1) * 10 + for idx, kp in enumerate(kps_uv): + kp_proj = pixel_to_camera(kp, kk, 1) * 10 + kp_proj_0c = kp_proj - xy1_center + kps_0c.append(float(kp_proj_0c[0])) + kps_0c.append(float(kp_proj_0c[1])) + + kp_orig = pixel_to_camera(kp, kk, 1) + kps_orig.append(float(kp_orig[0])) + kps_orig.append(float(kp_orig[1])) + + + # Append the y of the ground foot to the keypoints + # kp_gr = np.array([0, vv_gr, 1]) + # xy1_gr = pixel_to_camera(kp_gr, kk, 1) + # kps_0c.append(float(xy1_gr[1])) + + return kps_0c, kps_orig + + +def get_keypoints(kps_0, kps_1, mode): + """Get the center of 2 lists""" + + assert mode == 'center' or mode == 'shoulder' or mode == 'hip' + + if mode == 'center': + uu = (max(kps_0) - min(kps_0)) / 2 + min(kps_0) + vv = (max(kps_1) - min(kps_1)) / 2 + min(kps_1) + + elif mode == 'shoulder': + uu = float(np.average(kps_0[5:7])) + vv = float(np.average(kps_1[5:7])) + + elif mode == 'hip': + uu = float(np.average(kps_0[11:13])) + vv = float(np.average(kps_1[11:13])) + + return uu, vv + + +def transform_kp(kps, tr_mode): + """Apply different transformations to the keypoints based on the tr_mode""" + + assert tr_mode == "None" or tr_mode == "singularity" or tr_mode == "upper" or tr_mode == "lower" \ + or tr_mode == "horizontal" or tr_mode == "vertical" or tr_mode == "lateral" \ + or tr_mode == 'shoulder' or tr_mode == 'knee' or tr_mode == 'upside' or tr_mode == 'falling' \ + or tr_mode == 'random' + + uu_c, vv_c = get_keypoints(kps[0], kps[1], mode='center') + + if tr_mode == "None": + return kps + + elif tr_mode == "singularity": + uus = [uu_c for uu in kps[0]] + vvs = [vv_c for vv in kps[1]] + + elif tr_mode == "vertical": + uus = [uu_c for uu in kps[0]] + vvs = kps[1] + + elif tr_mode == 'horizontal': + uus = kps[0] + vvs = [vv_c for vv in kps[1]] + + elif tr_mode == 'lower': + uus = kps[0] + vvs = kps[1][:9] + [vv_c for vv in kps[1][9:]] + + elif tr_mode == 'upper': + uus = kps[0] + vvs = [vv_c for vv in kps[1][:9]] + kps[1][9:] + + elif tr_mode == 'lateral': + uus = [] + for idx, kp in enumerate(kps[0]): + if idx % 2 == 1: + uus.append(kp) + else: + uus.append(uu_c) + vvs = kps[1] + + elif tr_mode == 'shoulder': + uus = kps[0] + vvs = kps[1][:7] + [kps[1][6] for vv in kps[1][7:]] + + elif tr_mode == 'knee': + uus = kps[0] + vvs = [kps[1][14] for vv in kps[1][:13]] + kps[1][13:] + + elif tr_mode == 'up': + uus = kps[0] + vvs = [kp - 300 for kp in kps[1]] + + elif tr_mode == 'falling': + uus = [kps[0][16] - kp + kps[1][16] for kp in kps[1]] + vvs = [kps[1][16] - kp + kps[0][16] for kp in kps[0]] + + elif tr_mode == 'random': + uu_min = min(kps[0]) + uu_max = max(kps[0]) + vv_min = min(kps[1]) + vv_max = max(kps[1]) + np.random.seed(6) + uus = np.random.uniform(uu_min, uu_max, len(kps[0])).tolist() + vvs = np.random.uniform(vv_min, vv_max, len(kps[1])).tolist() + + return [uus, vvs, kps[2], []] + + +def get_depth(uv_center, kk, dd): + + if len(uv_center) == 2: + uv_center.extend([1]) + uv_center_np = np.array(uv_center) + xyz_norm = pixel_to_camera(uv_center, kk, 1) + zz = dd / math.sqrt(1 + xyz_norm[0] ** 2 + xyz_norm[1] ** 2) + + xyz = pixel_to_camera(uv_center_np, kk, zz).tolist() + + return xyz \ No newline at end of file diff --git a/src/utils/kitti.py b/src/utils/kitti.py new file mode 100644 index 0000000..3d7703e --- /dev/null +++ b/src/utils/kitti.py @@ -0,0 +1,125 @@ + +import numpy as np +import copy + +from utils.camera import pixel_to_camera, get_keypoints +from eval.geom_baseline import compute_distance_single + + +def eval_geometric(uv_kps, uv_centers, uv_shoulders, kk, average_y=0.48): + """ + Evaluate geometric distance + """ + xy_centers = [] + dds_geom = [] + for idx, _ in enumerate(uv_centers): + uv_center = copy.deepcopy(uv_centers[idx]) + uv_center.append(1) + uv_shoulder = copy.deepcopy(uv_shoulders[idx]) + uv_shoulder.append(1) + uv_kp = uv_kps[idx] + xy_center = pixel_to_camera(uv_center, kk, 1) + xy_centers.append(xy_center.tolist()) + + uu_2, vv_2 = get_keypoints(uv_kp[0], uv_kp[1], mode='hip') + uv_hip = [uu_2, vv_2, 1] + + zz, _ = compute_distance_single(uv_shoulder, uv_hip, kk, average_y) + xyz_center = np.array([xy_center[0], xy_center[1], zz]) + dd_geom = float(np.linalg.norm(xyz_center)) + dds_geom.append(dd_geom) + + return dds_geom, xy_centers + + +def get_calibration(path_txt): + """Read calibration parameters from txt file: + For the left color camera we use P2 which is K * [I|t] + + P = [fu, 0, x0, fu*t1-x0*t3 + 0, fv, y0, fv*t2-y0*t3 + 0, 0, 1, t3] + + check also http://ksimek.github.io/2013/08/13/intrinsic/ + + Simple case test: + xyz = np.array([2, 3, 30, 1]).reshape(4, 1) + xyz_2 = xyz[0:-1] + tt + uv_temp = np.dot(kk, xyz_2) + uv_1 = uv_temp / uv_temp[-1] + kk_1 = np.linalg.inv(kk) + xyz_temp2 = np.dot(kk_1, uv_1) + xyz_new_2 = xyz_temp2 * xyz_2[2] + xyz_fin_2 = xyz_new_2 - tt + """ + + with open(path_txt, "r") as ff: + file = ff.readlines() + p2_str = file[2].split()[1:] + p2_list = [float(xx) for xx in p2_str] + p2 = np.array(p2_list).reshape(3, 4) + + kk = p2[:, :-1] + f_x = kk[0, 0] + f_y = kk[1, 1] + x0 = kk[2, 0] + y0 = kk[2, 1] + aa = p2[0, 3] + bb = p2[1, 3] + t3 = p2[2, 3] + t1 = (aa - x0*t3) / f_x + t2 = (bb - y0*t3) / f_y + tt = np.array([t1, t2, t3]).reshape(3, 1) + return kk, tt + + +def get_simplified_calibration(path_txt): + + with open(path_txt, "r") as ff: + file = ff.readlines() + + for line in file: + if line[:4] == 'K_02': + kk_str = line[4:].split()[1:] + kk_list = [float(xx) for xx in kk_str] + kk = np.array(kk_list).reshape(3, 3).tolist() + return kk + + raise ValueError('Matrix K_02 not found in the file') + + +def check_conditions(line, mode, thresh=0.5): + + """Check conditions of our or m3d txt file""" + + check = False + assert mode == 'gt' or mode == 'm3d' or mode == '3dop' or mode == 'our', "Type not recognized" + + if mode == 'm3d' or mode == '3dop': + conf = line.split()[15] + if line[:10] == 'pedestrian' and float(conf) >= thresh: + check = True + + elif mode == 'gt': + if line[:10] == 'Pedestrian': + check = True + + elif mode == 'our': + if line[10] >= thresh: + check = True + + return check + + +def get_category(box, trunc, occ): + + hh = box[3] - box[1] + + if hh >= 40 and trunc <= 0.15 and occ <= 0: + cat = 'easy' + elif trunc <= 0.3 and occ <= 1: + cat = 'moderate' + else: + cat = 'hard' + + return cat diff --git a/src/utils/misc.py b/src/utils/misc.py new file mode 100644 index 0000000..3f11c96 --- /dev/null +++ b/src/utils/misc.py @@ -0,0 +1,207 @@ + +import numpy as np +import torch +import time +import logging +# from shapely.geometry import box as Sbox + +def set_logger(log_path): + """Set the logger to log info in terminal and file `log_path`. + ``` + logging.info("Starting training...") + ``` + Args: + log_path: (string) where to log + """ + logger = logging.getLogger(__name__) + logger.setLevel(logging.INFO) + + # Logging to a file + file_handler = logging.FileHandler(log_path) + file_handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')) + logger.addHandler(file_handler) + + # Logging to console + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(logging.Formatter('%(message)s')) + logger.addHandler(stream_handler) + + return logger + + +def calculate_iou(box1, box2): + + # Calculate the (x1, y1, x2, y2) coordinates of the intersection of box1 and box2. Calculate its Area. + xi1 = max(box1[0], box2[0]) + yi1 = max(box1[1], box2[1]) + xi2 = min(box1[2], box2[2]) + yi2 = min(box1[3], box2[3]) + inter_area = max((xi2 - xi1), 0) * max((yi2 - yi1), 0) # Max keeps into account not overlapping box + + # Calculate the Union area by using Formula: Union(A,B) = A + B - Inter(A,B) + box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) + box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1]) + union_area = box1_area + box2_area - inter_area + + # compute the IoU + iou = inter_area / union_area + + return iou + + +def get_idx_max(box, boxes_gt): + + """Compute and save errors between a single box and the gt box which match""" + + iou_max = 0 + idx_max = 0 + for idx_gt, box_gt in enumerate(boxes_gt): + iou = calculate_iou(box, box_gt) + if iou > iou_max: + idx_max = idx_gt + iou_max = iou + + return idx_max, iou_max + + +def reparametrize_box3d(box): + """Reparametrized 3D box in the XZ plane and add the height""" + + hh, ww, ll = box[0:3] + x_c, y_c, z_c = box[3:6] + + x1 = x_c - ll/2 + z1 = z_c - ww/2 + x2 = x_c + ll/2 + z2 = z_c + ww / 2 + + return [x1, z1, x2, z2, hh] + + +# def calculate_iou3d(box3d_1, box3d_2): +# """3D intersection over union. Boxes are parametrized as x1, z1, x2, z2, hh +# We compute 2d iou in the birds plane and then add a factor for height differences (0-1)""" +# +# poly1 = Sbox(box3d_1[0], box3d_1[1], box3d_1[2], box3d_1[3]) +# poly2 = Sbox(box3d_2[0], box3d_2[1], box3d_2[2], box3d_2[3]) +# +# inter_2d = poly1.intersection(poly2).area +# union_2d = poly1.area + poly2.area - inter_2d +# +# # height_factor = 1 - abs(box3d_1[4] - box3d_2[4]) / max(box3d_1[4], box3d_2[4]) +# +# # +# iou_3d = inter_2d / union_2d # * height_factor +# +# return iou_3d + + +def laplace_sampling(outputs, n_samples): + + # np.random.seed(1) + t0 = time.time() + mu = outputs[:, 0] + bi = torch.abs(outputs[:, 1]) + + + # Analytical + # uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0]) + # xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu)) + + # Sampling + cuda_check = outputs.is_cuda + + if cuda_check: + get_device = outputs.get_device() + device = torch.device(type="cuda", index=get_device) + else: + device = torch.device("cpu") + t1 = time.time() + + xxs = torch.empty((0, mu.shape[0])).to(device) + t2 = time.time() + + laplace = torch.distributions.Laplace(mu, bi) + t3 = time.time() + for ii in range(1): + xx = laplace.sample((n_samples,)) + t4a = time.time() + xxs = torch.cat((xxs, xx.view(n_samples, -1)), 0) + t4 = time.time() + + # time_tot = t4 - t0 + # time_1 = t1 - t0 + # time_2 = t2 - t1 + # time_3 = t3 - t2 + # time_4a = t4a - t3 + # time_4 = t4 - t3 + # print("Time 1: {:.1f}%".format(time_1 / time_tot * 100)) + # print("Time 2: {:.1f}%".format(time_2 / time_tot * 100)) + # print("Time 3: {:.1f}%".format(time_3 / time_tot * 100)) + # print("Time 4a: {:.1f}%".format(time_4a / time_tot * 100)) + # print("Time 4: {:.1f}%".format(time_4 / time_tot * 100)) + return xxs + + +def epistemic_variance(total_outputs): + """Compute epistemic variance""" + + # var_y = np.sum(total_outputs**2, axis=0) / total_outputs.shape[0] - (np.mean(total_outputs, axis=0))**2 + var_y = np.var(total_outputs, axis=0) + lb = np.quantile(a=total_outputs, q=0.25, axis=0) + up = np.quantile(a=total_outputs, q=0.75, axis=0) + var_new = (up-lb) + + return var_y, var_new + + +def append_cluster(dic_jo, phase, xx, dd, kps): + + """Append the annotation based on its distance""" + + # if dd <= 6: + # dic_jo[phase]['clst']['6']['kps'].append(kps) + # dic_jo[phase]['clst']['6']['X'].append(xx) + # dic_jo[phase]['clst']['6']['Y'].append([dd]) # Trick to make it (nn,1) instead of (nn, ) + + if dd <= 10: + dic_jo[phase]['clst']['10']['kps'].append(kps) + dic_jo[phase]['clst']['10']['X'].append(xx) + dic_jo[phase]['clst']['10']['Y'].append([dd]) + + # elif dd <= 15: + # dic_jo[phase]['clst']['15']['kps'].append(kps) + # dic_jo[phase]['clst']['15']['X'].append(xx) + # dic_jo[phase]['clst']['15']['Y'].append([dd]) + + elif dd <= 20: + dic_jo[phase]['clst']['20']['kps'].append(kps) + dic_jo[phase]['clst']['20']['X'].append(xx) + dic_jo[phase]['clst']['20']['Y'].append([dd]) + + # elif dd <= 25: + # dic_jo[phase]['clst']['25']['kps'].append(kps) + # dic_jo[phase]['clst']['25']['X'].append(xx) + # dic_jo[phase]['clst']['25']['Y'].append([dd]) + + elif dd <= 30: + dic_jo[phase]['clst']['30']['kps'].append(kps) + dic_jo[phase]['clst']['30']['X'].append(xx) + dic_jo[phase]['clst']['30']['Y'].append([dd]) + + # elif dd <= 40: + # dic_jo[phase]['clst']['40']['kps'].append(kps) + # dic_jo[phase]['clst']['40']['X'].append(xx) + # dic_jo[phase]['clst']['40']['Y'].append([dd]) + # + # elif dd <= 50: + # dic_jo[phase]['clst']['50']['kps'].append(kps) + # dic_jo[phase]['clst']['50']['X'].append(xx) + # dic_jo[phase]['clst']['50']['Y'].append([dd]) + + else: + dic_jo[phase]['clst']['>30']['kps'].append(kps) + dic_jo[phase]['clst']['>30']['X'].append(xx) + dic_jo[phase]['clst']['>30']['Y'].append([dd]) + + diff --git a/src/utils/normalize.py b/src/utils/normalize.py new file mode 100644 index 0000000..3f0aca9 --- /dev/null +++ b/src/utils/normalize.py @@ -0,0 +1,107 @@ + +import torch +import numpy as np + + +def normalize_labels(labels, dic_norm): + """Apply the log to remove right-skewness and then apply Gaussian Normalization""" + + labels_new = torch.div(torch.log(labels) - dic_norm['mean']['Y'], dic_norm['std']['Y']) + return labels_new + + +def unnormalize_labels(labels, dic_norm): + """Apply the log to remove right-skewness and then apply Gaussian Normalization""" + + return torch.exp(labels * dic_norm['std']['Y'] + dic_norm['mean']['Y']) + + +def unnormalize_outputs(outputs, dic_norm, dim, normalize): + """Unnnormalize outputs, with an option to only pass from si to bi""" + + if dim == 1: + assert normalize, "Inconsistency" + return unnormalize_labels(outputs, dic_norm) + + else: + mu = outputs[:, 0:1] + si = outputs[:, 1:2] + bi = torch.exp(si) + + if normalize: + mu = unnormalize_labels(mu, dic_norm) + bi = torch.exp(bi * dic_norm['std']['Y']) + + return torch.cat((mu, bi), 1) + + +def unnormalize_bi(outputs): + """Unnormalize relative bi of a nunmpy array""" + + outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0] + return outputs + + +# def normalize_arrays_jo(dic_jo): +# """Normalize according to the mean and std of each keypoint in the training dataset +# PS normalization of training also for test and val""" +# +# # Normalize +# phase = 'train' +# kps_orig_tr = np.array(dic_jo[phase]['X']) +# # dd_orig_tr = np.array(dic_jo[phase]['Y']).reshape(-1, 1) +# kps_mean = np.mean(kps_orig_tr, axis=0) +# plt.hist(kps_orig_tr, bins=100) +# plt.show() +# kps_std = np.std(kps_orig_tr, axis=0) +# # dd_mean = np.mean(dd_orig_tr, axis=0) +# # dd_std = np.std(dd_orig_tr, axis=0) +# +# for phase in dic_jo: +# +# # Compute the normalized arrays +# kps_orig = np.array(dic_jo[phase]['X']) +# dd_orig = np.array(dic_jo[phase]['Y']).reshape(-1, 1) +# kps_norm = np.divide((kps_orig - kps_mean), kps_std) +# +# # dd_norm = np.divide((dd_orig - dd_mean), dd_std) # ! No normalization on the output +# +# # Substitute the new values in the dictionary and save the mean and std +# dic_jo[phase]['X'] = kps_norm.tolist() +# dic_jo[phase]['mean']['X'] = kps_mean.tolist() +# dic_jo[phase]['std']['X'] = kps_std.tolist() +# +# dic_jo[phase]['Y'] = dd_orig.tolist() +# # dic_jo[phase]['mean']['Y'] = float(dd_mean) +# # dic_jo[phase]['std']['Y'] = float(dd_std) +# +# # Normalize all the clusters +# for clst in dic_jo[phase]['clst']: +# +# # Extract +# kps_orig = np.array(dic_jo[phase]['clst'][clst]['X']) +# dd_orig = np.array(dic_jo[phase]['clst'][clst]['Y']).reshape(-1, 1) +# # Normalize +# kps_norm = np.divide((kps_orig - kps_mean), kps_std) +# +# # dd_norm = np.divide((dd_orig - dd_mean), dd_std) #! No normalization on the output +# +# # Put back +# dic_jo[phase]['clst'][clst]['X'] = kps_norm.tolist() +# dic_jo[phase]['clst'][clst]['Y'] = dd_orig.tolist() +# +# return dic_jo +# +# +# def check_cluster_dim(dic_jo): +# """ Check that the sum of the clusters corresponds to all annotations""" +# +# for phase in ['train', 'val', 'test']: +# cnt_clst = 0 +# cnt_all = len(dic_jo[phase]['X']) +# for clst in dic_jo[phase]['clst']: +# cnt_clst += len(dic_jo[phase]['clst'][clst]['X']) +# assert cnt_all == cnt_clst + + + diff --git a/src/utils/nuscenes.py b/src/utils/nuscenes.py new file mode 100644 index 0000000..73acf59 --- /dev/null +++ b/src/utils/nuscenes.py @@ -0,0 +1,110 @@ + +import random +import json +import os +import numpy as np + + +def get_unique_tokens(list_fin): + """ + list of json files --> list of unique scene tokens + """ + list_token_scene = [] + + # Open one json file at a time + for name_fin in list_fin: + with open(name_fin, 'r') as f: + dict_fin = json.load(f) + + # Check if the token scene is already in the list and if not add it + if dict_fin['token_scene'] not in list_token_scene: + list_token_scene.append(dict_fin['token_scene']) + + return list_token_scene + + +def split_scenes(list_token_scene, tr, val, dir_main, save=False, load=True): + """ + Split the list according tr, val percentages (test percentage is a consequence) after shuffling the order + """ + + path_split = os.path.join(dir_main, 'scenes', 'split_scenes.json') + + if save: + random.seed(1) + random.shuffle(list_token_scene) # it shuffles in place + n_scenes = len(list_token_scene) + n_train = round(n_scenes * tr / 100) + n_val = round(n_scenes * val / 100) + list_train = list_token_scene[0: n_train] + list_val = list_token_scene[n_train: n_train + n_val] + list_test = list_token_scene[n_train + n_val:] + + dic_split = {'train': list_train, 'val': list_val, 'test': list_test} + with open(path_split, 'w') as f: + json.dump(dic_split, f) + + if load: + with open(path_split, 'r') as f: + dic_split = json.load(f) + + return dic_split + + +def select_categories(cat): + """ + Choose the categories to extract annotations from + """ + assert cat == 'person' or cat == 'all' or cat == 'car' + + if cat == 'person': + categories = ['human.pedestrian'] + + elif cat == 'all': + categories = ['human.pedestrian', + 'vehicle.bicycle', 'vehicle.motorcycle'] + + elif cat == 'car': + categories = ['vehicle'] + + return categories + + +def update_with_tokens(dict_gt, nusc, token_sd): + """ + Update with tokens corresponding to the token_sd + """ + + table_sample_data = nusc.get('sample_data', token_sd) # Extract the whole record to get the sample token + token_sample = table_sample_data['sample_token'] # Extract the sample_token from the table + table_sample = nusc.get('sample', token_sample) # Get the record of the sample + token_scene = table_sample['scene_token'] + dict_gt['token_sample_data'] = token_sd + dict_gt['token_sample'] = token_sample + dict_gt['token_scene'] = token_scene + return dict_gt + + +def update_with_box(dict_gt, box): + + bbox = np.zeros(7, ) + flag_child = False + + # Save the 3D bbox + bbox[0:3] = box.center + bbox[3:6] = box.wlh + bbox[6] = box.orientation.degrees + dict_gt['boxes'].append(bbox.tolist()) # Save as list to be serializable by a json file + if box.name == 'human.pedestrian.child': + flag_child = True + + return dict_gt, flag_child + +# def merge_gt_pif(dic_gt, im_path): +# +# # Extract ground truth boxes +# boxes_2d, _ = self.project_3d(dic_gt, mode='box_center') # Project the 3D gt into the image plane +# boxes_gt = [np.array([box[0][0], box[0][1], box[1][0], box[1][1]]) for box in boxes_2d] +# # Match each annotation and save them in the json file +# self.merge_gt_pif_single(dic_gt, boxes_gt, boxes_pif, keypoints, basename, +# iou_thresh=self.iou_thresh) diff --git a/src/utils/pifpaf.py b/src/utils/pifpaf.py new file mode 100644 index 0000000..de5cc56 --- /dev/null +++ b/src/utils/pifpaf.py @@ -0,0 +1,100 @@ + +import numpy as np +from utils.camera import preprocess_single, get_keypoints, pixel_to_camera + + +def preprocess_pif(annotations, im_size=None): + """ + Preprocess pif annotations: + 1. enlarge the box of 10% + 2. Constraint it inside the image (if image_size provided) + """ + + boxes = [] + keypoints = [] + + for dic in annotations: + box = dic['bbox'] + if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0) + return [], [] + + else: + kps = prepare_pif_kps(dic['keypoints']) + conf = float(np.mean(np.array(kps[2]))) + + # Add 10% for y + delta_h = (box[3] - box[1]) / 10 + delta_w = (box[2] - box[0]) / 10 + assert delta_h > 0 and delta_w > 0, "Bounding box <=0" + box[0] -= delta_w + box[1] -= delta_h + box[2] += delta_w + box[3] += delta_h + + # Put the box inside the image + if im_size is not None: + box[0] = max(0, box[0]) + box[1] = max(0, box[1]) + box[2] = min(box[2], im_size[0]) + box[3] = min(box[3], im_size[1]) + + box.append(conf) + boxes.append(box) + keypoints.append(kps) + + return boxes, keypoints + + +def get_input_data(boxes, keypoints, kk, left_to_right=False): + inputs = [] + xy_centers = [] + uv_boxes = [] + uv_centers = [] + uv_shoulders = [] + uv_kps = [] + xy_kps = [] + + if left_to_right: # Order boxes from left to right + ordered = np.argsort([xx[0] for xx in boxes]) + + else: # Order boxes from most to less confident + confs = [] + for idx, box in enumerate(boxes): + confs.append(box[4]) + ordered = np.argsort(confs).tolist()[::-1] + + for idx in ordered: + kps = keypoints[idx] + uv_kps.append(kps) + uv_boxes.append(boxes[idx]) + + uu_c, vv_c = get_keypoints(kps[0], kps[1], "center") + uv_centers.append([round(uu_c), round(vv_c)]) + xy_center = pixel_to_camera(np.array([uu_c, vv_c, 1]), kk, 1) + xy_centers.append(xy_center) + + uu_1, vv_1 = get_keypoints(kps[0], kps[1], "shoulder") + uv_shoulders.append([round(uu_1), round(vv_1)]) + + # 2 steps of input normalization for each instance + kps_prep, kps_orig = preprocess_single(kps, kk) + inputs.append(kps_prep) + xy_kps.append(kps_orig) + + return (inputs, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) + + +def prepare_pif_kps(kps_in): + """Convert from a list of 51 to a list of 3, 17""" + + keypoints = np.array(kps_in).reshape(-1, 3).tolist() + xxs = [] + yys = [] + ccs = [] + + for kp_triple in keypoints: + xxs.append(kp_triple[0]) + yys.append(kp_triple[1]) + ccs.append(kp_triple[2]) + + return [xxs, yys, ccs] diff --git a/src/visuals/paper.py b/src/visuals/paper.py new file mode 100644 index 0000000..01b6cf4 --- /dev/null +++ b/src/visuals/paper.py @@ -0,0 +1,139 @@ + +import numpy as np +import os +import matplotlib.pyplot as plt +from matplotlib.patches import Ellipse +from visuals.printer import get_angle +from visuals.printer import get_confidence + + +def paper(): + """Print paper figures""" + dir_out = os.path.join('data', 'all_images', 'paper') + + method = True + task_error = True + + # Pull figure + if method: + fig_name = 'output_method.png' + z_max = 5 + x_max = z_max * 25 / 30 + + z_1 = 2 + z_2 = 4 + x_1 = 1 + x_2 = -2 + std_1 = 0.75 + std_2 = 1.5 + angle_1 = get_angle(x_1, z_1) + angle_2 = get_angle(x_2, z_2) + + (x_1_down, x_1_up), (z_1_down, z_1_up) = get_confidence(x_1, z_1, std_1) + (x_2_down, x_2_up), (z_2_down, z_2_up) = get_confidence(x_2, z_2, std_2) + # + # fig = plt.figure(0) + # ax = fig.add_subplot(1, 1, 1) + # + # ell_1 = Ellipse((x_1, z_1), width=std_1 * 2, height=0.3, angle=angle_1, color='b', fill=False) + # + # ell_2 = Ellipse((x_2, z_2), width=std_2 * 2, height=0.3, angle=angle_2, color='b', fill=False) + # + # ax.add_patch(ell_1) + # ax.add_patch(ell_2) + # plt.plot(x_1_down, z_1_down, marker='o', markersize=8, color='salmon') + # plt.plot(x_1, z_1, 'go', markersize=8) + # plt.plot(x_1_up, z_1_up, 'o', markersize=8, color='cornflowerblue') + # + # plt.plot(x_2_down, z_2_down, marker='o', markersize=8, color='salmon') + # plt.plot(x_2, z_2, 'go', markersize=8) + # plt.plot(x_2_up, z_2_up, 'bo', markersize=8, color='cornflowerblue') + # + # plt.plot([0, x_max], [0, z_max], 'k--') + # plt.plot([0, -x_max], [0, z_max], 'k--') + # plt.xticks([]) + # plt.yticks([]) + # plt.xlabel('X [m]') + # plt.ylabel('Z [m]') + # plt.show() + # plt.close() + + fig = plt.figure(1) + ax = fig.add_subplot(1, 1, 1) + + ell_3 = Ellipse((0, 2), width=std_1 * 2, height=0.3, angle=-90, color='b', fill=False, linewidth=2.5) + ell_4 = Ellipse((0, 2), width=std_1 * 3, height=0.3, angle=-90, color='r', fill=False, + linestyle='dashed', linewidth=2.5) + ax.add_patch(ell_4) + ax.add_patch(ell_3) + plt.plot(0, 2, marker='o', color='skyblue', markersize=9) + plt.plot([0, 3], [0, 4], 'k--') + plt.plot([0, -3], [0, 4], 'k--') + plt.xlim(-3, 3) + plt.ylim(0, 3.5) + plt.xticks([]) + plt.yticks([]) + plt.xlabel('X [m]') + plt.ylabel('Z [m]') + plt.savefig(os.path.join(dir_out, fig_name)) + plt.show() + plt.close() + + # Task error figure + if task_error: + plt.figure(2) + fig_name = 'task_error.png' + xx = np.linspace(0, 40, 100) + mm_male = 7 / 178 + mm_female = 7 / 165 + mm_young_male = mm_male + (178-164) / 178 + mm_young_female = mm_female + (165-156) / 165 + mm_gender = gmm() + yy_male = target_error(xx, mm_male) + yy_female = target_error(xx, mm_female) + yy_young_male = target_error(xx, mm_young_male) + yy_young_female = target_error(xx, mm_young_female) + yy_gender = target_error(xx, mm_gender) + yy_gps = np.linspace(5., 5., xx.shape[0]) + plt.grid(linewidth=0.3) + plt.plot(xx, yy_gps, color='y', label='GPS') + plt.plot(xx, yy_young_male, linestyle='dotted',linewidth=2.1, color='b', label='Adult/young male') + plt.plot(xx, yy_young_female, linestyle='dotted',linewidth=2.1, color='darkorange', label='Adult/young female') + plt.plot(xx, yy_gender, '--', color='lightgreen', linewidth=2.8, label='Generic adult (task error)') + plt.plot(xx, yy_female, '-.', linewidth=1.7, color='darkorange', label='Adult female') + plt.plot(xx, yy_male, '-.', linewidth=1.7, color='b', label='Adult male') + + plt.xlim(np.min(xx), np.max(xx)) + plt.xlabel("Distance from the camera [m]") + plt.ylabel("Localization error due to human height variation [m]") + plt.legend(loc=(0.01, 0.55)) # Location from 0 to 1 from lower left + plt.savefig(os.path.join(dir_out, fig_name)) + plt.show() + plt.close() + + + +def target_error(xx, mm): + return mm * xx + +def gmm(): + mu_men = 178 + std_men = 7 + mu_women = 165 + std_women = 7 + N_men = np.random.normal(mu_men, std_men, 100000) + N_women = np.random.normal(mu_women, std_women, 100000) + N_gmm = np.concatenate((N_men, N_women)) + mu_gmm = np.mean(N_gmm) + std_gmm = np.std(N_gmm) + # sns.distplot(N_men, hist=False, rug=False, label="Men") + # sns.distplot(N_women, hist=False, rug=False, label="Women") + # sns.distplot(N_gmm, hist=False, rug=False, label="GMM") + # plt.xlabel("X [cm]") + # plt.ylabel("Height distributions of men and women") + # plt.legend() + # plt.show() + print("Variace of GMM distribution: {:.2f}".format(std_gmm)) + mm_gender = std_gmm / mu_gmm + + return mm_gender \ No newline at end of file diff --git a/src/visuals/printer.py b/src/visuals/printer.py new file mode 100644 index 0000000..285cd39 --- /dev/null +++ b/src/visuals/printer.py @@ -0,0 +1,281 @@ + +import os +import math +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.cm as cm +import matplotlib +from mpl_toolkits.axes_grid1 import make_axes_locatable +from matplotlib.patches import Ellipse, Circle +import cv2 +from collections import OrderedDict +from PIL import Image + + +class Printer: + """ + Print results on images: birds eye view and computed distance + """ + + def __init__(self, image_path, output_path, dic_ann, kk, output_types, y_scale=1, show=False, + draw_kps=False, text=True, legend=True, epistemic=False, z_max=30, fig_width=10): + + self.kk = kk + self.y_scale = y_scale # 1.85 for kitti combined + self.output_types = output_types + self.show = show + self.draw_kps = draw_kps + self.text = text + self.epistemic = epistemic + self.legend = legend + self.z_max = z_max + self.fig_width = fig_width + + from utils.camera import pixel_to_camera, get_depth + self.pixel_to_camera = pixel_to_camera + self.get_depth = get_depth + + # Define the output dir + self.path_out = output_path + + # Define the birds eye view + self.xx_gt = [xx[0] for xx in dic_ann['xyz_real']] + self.zz_gt = [xx[2] for xx in dic_ann['xyz_real']] + self.xx_pred = [xx[0] for xx in dic_ann['xyz_pred']] + self.zz_pred = [xx[2] for xx in dic_ann['xyz_pred']] + self.dds_real = dic_ann['dds_real'] + self.stds_ale = dic_ann['stds_ale'] + self.stds_ale_epi = dic_ann['stds_epi'] + self.uv_centers = dic_ann['uv_centers'] + self.uv_shoulders = dic_ann['uv_shoulders'] + self.uv_kps = dic_ann['uv_kps'] + + # Load the image + with open(image_path, 'rb') as f: + self.im = Image.open(f).convert('RGB') + + # Resize image for aesthetic proportions + if y_scale >= 1.02 or y_scale <= 0.98: + ww = self.im.size[0] + hh = self.im.size[1] + self.im = self.im.resize((ww, round(hh * y_scale))) + + self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1]) + + def print(self): + """ + Show frontal, birds-eye-view or combined visualization + With or without ground truth + Either front and/or bird visualization or combined one + """ + # Parameters + radius = 14 + radius_kps = 6 + fontsize_bv = 16 + fontsize = 18 + textcolor = 'darkorange' + color_kps = 'yellow' + + # Initialize combined + if 'combined' in self.output_types: + width = self.fig_width + 0.6 * self.fig_width + height = self.fig_width * self.im.size[1] / self.im.size[0] + fig_ar_1 = 1.7 # 1.3 before + width_ratio = 1.9 + ext = '.combined.png' + + fig, (ax1, ax0) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [1, width_ratio]}, + figsize=(width, height)) + ax1.set_aspect(fig_ar_1) + fig.set_tight_layout(True) + fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02) + + assert 'front' not in self.output_types and 'bird' not in self.output_types, \ + "--combined arguments is not supported with other visualizations" + + # Initialize front + elif 'front' in self.output_types: + width = self.fig_width + height = self.fig_width * self.im.size[1] / self.im.size[0] + + plt.figure(0) + fig0, ax0 = plt.subplots(1, 1, figsize=(width, height)) + fig0.set_tight_layout(True) + + # Create front + if any(xx in self.output_types for xx in ['front', 'combined']): + + ax0.set_axis_off() + ax0.set_xlim(0, self.im.size[0]) + ax0.set_ylim(self.im.size[1], 0) + ax0.imshow(self.im) + z_min = 0 + bar_ticks = self.z_max // 5 + 1 + + cmap = cm.get_cmap('jet') + num = 0 + for idx, uv in enumerate(self.uv_shoulders): + + if self.draw_kps: + ax0 = self.show_kps(ax0, self.uv_kps[idx], self.y_scale, radius_kps, color_kps) + + elif self.zz_pred[idx] < self.z_max and 0 < self.zz_gt[idx] < self.z_max: + color = cmap((self.zz_pred[idx] % self.z_max) / self.z_max) + circle = Circle((uv[0], uv[1] * self.y_scale), radius=radius, color=color, fill=True) + ax0.add_patch(circle) + + if self.text: + ax0.text(uv[0]+radius, uv[1] * self.y_scale - radius, str(num), + fontsize=fontsize, color=textcolor, weight='bold') + num += 1 + + ax0.get_xaxis().set_visible(False) + ax0.get_yaxis().set_visible(False) + + divider = make_axes_locatable(ax0) + cax = divider.append_axes('right', size='3%', pad=0.05) + + norm = matplotlib.colors.Normalize(vmin=z_min, vmax=self.z_max) + sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm) + sm.set_array([]) + plt.colorbar(sm, ticks=np.linspace(z_min, self.z_max, bar_ticks), + boundaries=np.arange(z_min - 0.05, self.z_max + 0.1, .1), cax=cax, label='Z [m]') + # Save Front + if 'front' in self.output_types: + if self.show: + plt.show() + else: + plt.savefig(self.path_out + '.front.png', bbox_inches='tight') + + # Initialize Bird + if 'bird' in self.output_types: + plt.close() + plt.figure(1) + ext = '.bird.png' + fig1, ax1 = plt.subplots(1, 1) + fig1.set_tight_layout(True) + + # Create bird or combine it with front) + if any(xx in self.output_types for xx in ['bird', 'combined']): + im_height = 375 # TODO change it + uv_max = np.array([0, im_height, 1]) + xyz_max = self.pixel_to_camera(uv_max, self.kk, self.z_max) + x_max = abs(xyz_max[0]) # shortcut to avoid oval circles in case of different kk + + for idx, _ in enumerate(self.xx_gt): + if 0 < self.zz_gt[idx] < self.z_max: + target = get_target_error(self.dds_real[idx]) + + angle = get_angle(self.xx_gt[idx], self.zz_gt[idx]) + ellipse_real = Ellipse((self.xx_gt[idx], self.zz_gt[idx]), width=target * 2, height=1, + angle=angle, color='lightgreen', fill=True, label="Task error") + ax1.add_patch(ellipse_real) + ax1.plot(self.xx_gt[idx], self.zz_gt[idx], 'kx', label="Ground truth", markersize=3) + + # Print prediction and the real ground truth. Color of prediction depends if ground truth exists + num = 0 + for idx, _ in enumerate(self.xx_pred): + if 0 < self.zz_gt[idx] < self.z_max: # only the merging ones and inside the interval + + angle = get_angle(self.xx_pred[idx], self.zz_pred[idx]) + ellipse_ale = Ellipse((self.xx_pred[idx], self.zz_pred[idx]), width=self.stds_ale[idx] * 2, + height=1, angle=angle, color='b', fill=False, label="Aleatoric Uncertainty", + linewidth=1.3) + ellipse_var = Ellipse((self.xx_pred[idx], self.zz_pred[idx]), width=self.stds_ale_epi[idx] * 2, + height=1, angle=angle, color='r', fill=False, label="Uncertainty", linewidth=1, + linestyle='--') + + ax1.add_patch(ellipse_ale) + if self.epistemic: + ax1.add_patch(ellipse_var) + + ax1.plot(self.xx_pred[idx], self.zz_pred[idx], 'ro', label="Predicted", markersize=3) + + # Plot the number + if not self.draw_kps: + (_, x_pos), (_, z_pos) = get_confidence(self.xx_pred[idx], self.zz_pred[idx], self.stds_ale_epi[idx]) + + if self.text: + ax1.text(x_pos, z_pos, str(num), fontsize=fontsize_bv, color='darkorange') + num += 1 + + # To avoid repetitions in the legend + if self.legend: + handles, labels = ax1.get_legend_handles_labels() + by_label = OrderedDict(zip(labels, handles)) + ax1.legend(by_label.values(), by_label.keys()) + + # Adding field of view + ax1.plot([0, x_max], [0, self.z_max], 'k--') + ax1.plot([0, -x_max], [0, self.z_max], 'k--') + ax1.set_ylim(0, self.z_max+1) + # ax1.set_xmin([-10, 10]) + # ax1.set_ymin([0, 20]) + # x_max_field = min(x_max, z_max / 1.55) # ! Shortcut to avoid oval circles in case of different k + # plt.axis([-x_max_field, x_max_field, 0, z_max]) + # plt.xticks([]) + # plt.title('Birds eye view') + ax1.set_xlabel("X [m]") + ax1.set_ylabel("Z [m]") + + if self.show: + plt.show() + else: + plt.savefig(self.path_out + ext, bbox_inches='tight') + + if self.draw_kps: + im = cv2.imread(self.path_out + ext) + im = self.increase_brightness(im, value=30) + hh = im.size[1] + ww = im.size[0] + im_new = im[0:hh, 0:round(ww/1.7)] + cv2.imwrite(self.path_out, im_new) + + plt.close('all') + + def show_kps(self, ax0, uv_kp_single, y_scale, radius, color): + + for idx, uu in enumerate(uv_kp_single[0]): + vv = uv_kp_single[1][idx] + circle = Circle((uu, vv * y_scale), radius=radius, color=color, fill=True) + ax0.add_patch(circle) + + return ax0 + + def increase_brightness(self, img, value=30): + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + h, s, v = cv2.split(hsv) + + lim = 255 - value + v[v > lim] = 255 + v[v <= lim] += value + + final_hsv = cv2.merge((h, s, v)) + img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR) + return img + + +def get_confidence(xx, zz, std): + """Obtain the points to plot the confidence of each annotation""" + + theta = math.atan2(zz, xx) + + delta_x = std * math.cos(theta) + delta_z = std * math.sin(theta) + + return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z) + + +def get_angle(xx, zz): + """Obtain the points to plot the confidence of each annotation""" + + theta = math.atan2(zz, xx) + angle = theta * (180 / math.pi) + + return angle + + +def get_target_error(dd): + """Get target error not knowing the gender""" + mm_gender = 0.0556 + return mm_gender * dd diff --git a/src/visuals/results.py b/src/visuals/results.py new file mode 100644 index 0000000..ac691b1 --- /dev/null +++ b/src/visuals/results.py @@ -0,0 +1,260 @@ + +import os +import time +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.patches import Ellipse + + +def print_results(dic_stats, show=False, save=False): + + """ + Visualize error as function of the distance on the test set and compare it with target errors based on human + height analyses. We consider: + Position error in meters due to a height variation of 7 cm (Standard deviation already knowing the sex) + Position error not knowing the gender (13cm as average difference --> 7.5cm of error to add) + """ + dir_out = 'docs' + phase = 'test' + x_min = 0 + x_max = 38 + xx = np.linspace(0, 60, 100) + mm_std = 0.04 + mm_gender = 0.0556 + excl_clusters = ['all', '50', '>50', 'easy', 'moderate', 'hard'] + clusters = tuple([clst for clst in dic_stats[phase]['our']['mean'] if clst not in excl_clusters]) + yy_gender = target_error(xx, mm_gender) + yy_gps = np.linspace(5., 5., xx.shape[0]) + + # # Recall results + # plt.figure(1) + # plt.xlabel("Distance [meters]") + # plt.ylabel("Detected instances") + # plt.xlim(x_min, x_max) + # for method in ['our', 'm3d', '3dop']: + # xxs = get_distances(clusters) + # dic_cnt = dic_stats[phase][method]['cnt'] + # cnts = get_values(dic_cnt, clusters) + # plt.plot(xxs, cnts, marker='s', label=method + '_method') + # plt.legend() + # plt.show() + # + # Precision on same instances + fig_name = 'results.png' + plt.xlabel("Distance [meters]") + plt.ylabel("Average localization error [m]") + plt.xlim(x_min, x_max) + labels = ['Mono3D', 'Geometric Baseline', 'MonoDepth', 'Our MonoLoco', '3DOP'] + mks = ['*', '^', 'p', 's', 'o'] + mksizes = [6, 6, 6, 6, 6] + lws = [1.5, 1.5, 1.5, 2.2, 1.6] + colors = ['r', 'deepskyblue', 'grey', 'b', 'darkorange'] + lstyles = ['solid', 'solid', 'solid', 'solid', 'dashdot'] + + plt.plot(xx, yy_gps, '-', label="GPS Error", color='y') + for idx, method in enumerate(['m3d_merged', 'geom_merged', 'md_merged', 'our_merged', '3dop_merged']): + dic_errs = dic_stats[phase][method]['mean'] + errs = get_values(dic_errs, clusters) + xxs = get_distances(clusters) + + plt.plot(xxs, errs, marker=mks[idx], markersize=mksizes[idx], linewidth=lws[idx], label=labels[idx], + linestyle=lstyles[idx], color=colors[idx]) + plt.plot(xx, yy_gender, '--', label="Task error", color='lightgreen', linewidth=2.5) + plt.legend(loc='upper left') + if show: + plt.show() + else: + plt.savefig(os.path.join(dir_out, fig_name)) + plt.close() + + # FIGURE SPREAD + fig_name = 'spread.png' + # fig = plt.figure(3) + # ax = fig.add_subplot(1, 1, 1) + fig, ax = plt.subplots(2, sharex=True) + plt.xlabel("Distance [m]") + plt.ylabel("Aleatoric uncertainty [m]") + ar = 0.5 # Change aspect ratio of ellipses + scale = 1.5 # Factor to scale ellipses + rec_c = 0 # Center of the rectangle + # rec_h = 2.8 # Height of the rectangle + plots_line = True + # ax[0].set_ylim([-3, 3]) + # ax[1].set_ylim([0, 3]) + # ax[1].set_ylabel("Aleatoric uncertainty [m]") + # ax[0].set_ylabel("Confidence intervals") + + dic_ale = dic_stats[phase]['our']['std_ale'] + bbs = np.array(get_values(dic_ale, clusters)) + xxs = get_distances(clusters) + yys = target_error(np.array(xxs), mm_gender) + # ale_het = tuple(bbs - yys) + # plt.plot(xxs, ale_het, marker='s', label=method) + ax[1].plot(xxs, bbs, marker='s', color='b', label="Spread b") + ax[1].plot(xxs, yys, '--', color='lightgreen', label="Task error", linewidth=2.5) + yys_up = [rec_c + ar/2 * scale * yy for yy in yys] + bbs_up = [rec_c + ar/2 * scale * bb for bb in bbs] + yys_down = [rec_c - ar/2 * scale * yy for yy in yys] + bbs_down = [rec_c - ar/2 * scale * bb for bb in bbs] + + if plots_line: + ax[0].plot(xxs, yys_up, '--', color='lightgreen', markersize=5, linewidth=1) + ax[0].plot(xxs, yys_down, '--', color='lightgreen', markersize=5, linewidth=1) + ax[0].plot(xxs, bbs_up, marker='s', color='b', markersize=5, linewidth=0.7) + ax[0].plot(xxs, bbs_down, marker='s', color='b', markersize=5, linewidth=0.7) + + # rectangle = Rectangle((2, rec_c - (rec_h/2)), width=34, height=rec_h, color='black', fill=False) + + for idx, xx in enumerate(xxs): + te = Ellipse((xx, rec_c), width=yys[idx]*ar*scale, height=scale, angle=90, color='lightgreen', fill=True) + bi = Ellipse((xx, rec_c), width=bbs[idx]*ar*scale, height=scale, angle=90, color='b',linewidth=1.8, + fill=False) + + # ax[0].add_patch(rectangle) + ax[0].add_patch(te) + ax[0].add_patch(bi) + + fig.subplots_adjust(hspace=0.1) + plt.setp([aa.get_yticklabels() for aa in fig.axes[:-1]], visible=False) + plt.legend() + if show: + plt.show() + plt.close() + + +def target_error(xx, mm): + return mm * xx + + +def get_distances(clusters): + + clusters_ext = list(clusters) + clusters_ext.insert(0, str(0)) + distances = [] + for idx, _ in enumerate(clusters_ext[:-1]): + clst_0 = float(clusters_ext[idx]) + clst_1 = float(clusters_ext[idx + 1]) + distances.append((clst_1 - clst_0) / 2 + clst_0) + return tuple(distances) + + +def get_values(dic_err, clusters): + + errs = [] + for key in clusters: + errs.append(dic_err[key]) + return errs + + +def get_confidence_points(confidences, distances, errors): + + confidence_points = [] + distance_points = [] + for idx, dd in enumerate(distances): + + conf_perc = confidences[idx] + confidence_points.append(errors[idx] + conf_perc) + confidence_points.append(errors[idx] - conf_perc) + distance_points.append(dd) + distance_points.append(dd) + + return distance_points, confidence_points + + + +# def histogram(self): +# """ +# Visualize histograms to compare error performances of net and baseline +# """ +# # for mode in ['mean', 'std']: +# for mode in ['mean']: +# +# # for phase in ['train', 'val', 'test']: +# for phase in ['test']: +# +# err_geom = [] +# err_net = [] +# counts = [] +# clusters = ('10', '20', '30', '40', '50', '>50', 'all') +# for clst in clusters: +# +# err_geom.append(dic_geom[phase]['error'][clst][mode]) +# counts.append(dic_geom[phase]['error'][clst]['count']) +# err_net.append(dic_net[phase][clst][mode]) +# +# nn = len(clusters) +# +# # TODO Shortcut to print 2 models in a single histogram +# err_net_l1 = [1.1, 1.32, 2.19, 3.29, 4.38, 6.58, 2.21] +# +# ind = np.arange(nn) # the x locations for the groups +# width = 0.30 # the width of the bars +# +# fig, ax = plt.subplots() +# rects1 = ax.bar(ind, err_geom, width, color='b') +# rects2 = ax.bar(ind + width, err_net_l1, width, color='r') +# rects3 = ax.bar(ind + 2 * width, err_net, width, color='g') +# +# # add some text for labels, title and axes ticks +# ax.set_ylabel('Distance error [m]') +# ax.set_title(mode + ' of errors in ' + phase) +# ax.set_xticks(ind + width / 2) +# ax.set_xticklabels(clusters) +# +# ax.legend((rects1[0], rects2[0], rects3[0]), ('Geometrical', 'L1 Loss', 'Laplacian Loss')) +# +# # Attach a text label above each bar displaying number of annotations +# for idx, rect in enumerate(rects1): +# height = rect.get_height() +# count = counts[idx] +# ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, +# '%d' % int(count), ha='center', va='bottom') +# +# plt.show() + + # def error(self): +# """ +# Visualize error as function of the distance on the test set and compare it with target errors based on human +# height analyses. We consider: +# Position error in meters due to a height variation of 7 cm (Standard deviation already knowing the sex) +# Position error not knowing the gender (13cm as average difference --> 7.5cm of error to add) +# """ +# phase = 'test' +# xx = np.linspace(0, 60, 100) +# mm_std = 0.04 +# mm_gender = 0.0556 +# clusters = tuple([clst for clst in dic_net[phase] if clst not in ['all', '>50']]) +# # errors_geom = [dic_geom[phase]['error'][clst][mode] for clst in clusters] +# errors_net = [dic_net[phase][clst]['mean'] for clst in clusters] +# confidences = [dic_net[phase][clst]['std'][0] for clst in clusters] +# distances = get_distances(clusters) +# dds, zzs = get_confidence_points(confidences, distances, errors_net) +# +# # Set the plot +# fig = plt.figure() +# ax = plt.subplot(111) +# plt.xlim(0, 50) +# plt.xlabel("Distance [meters]") +# plt.ylabel("Error [meters]") +# plt.title("Error on Z Position Estimate due to height variation") +# +# # Plot the target errors +# yy_std = target_error(xx, mm_std) +# yy_gender = target_error(xx, mm_gender) +# # yy_child = target_error(xx, mm_child) +# # yy_test = target_error(xx, mm_test) +# yy_gps = np.linspace(5., 5., xx.shape[0]) +# plt.plot(xx, yy_std, '--', label="Knowing the gender", color='g') +# plt.plot(xx, yy_gender, '--', label="NOT knowing the gender", color='b') +# plt.plot(xx, yy_gps, '-', label="GPS Error", color='y') +# +# for idx in range(0, len(dds), 2): +# +# plt.plot(dds[idx: idx+2], zzs[idx: idx+2], color='k') +# +# # Plot the geometric and net errors as dots +# _ = ax.plot(distances, errors_net, 'ro', marker='s', label="Network Error (test)", markersize='8') +# # _ = ax.plot(distances, errors_geom, 'ko', marker='o', label="Baseline Error", markersize='5') +# +# ax.legend(loc='upper left') +# plt.show() \ No newline at end of file