From 019b6b0fadabe782b9dc2c77077516b97780a61f Mon Sep 17 00:00:00 2001 From: Lorenzo Bertoni <34957815+bertoni9@users.noreply.github.com> Date: Fri, 28 Jun 2019 18:33:58 +0200 Subject: [PATCH] make utils in torch and remove redundant functions (#3) * Add precision metrics * add mode gt_all and change default threshold * add cyclists * add iou matrix' ' * add cyclists only in training phase * add dropout in model name * small typos * small typo * fix error on uv_boxes * change default mode from gt_ped to gt * 2 decimals * fix name bug * refactor prepare_pif_kps * corrected get_keypoints_batch * add pixel to camera for 3d vectors * preprocessing in torch * return original outputs * Skeleton for post_process * baseline version for post processing * add keypoints torch in post_processing * cleaning misc * add reorder_matches * update preprocess with get_iou_matches * fix indices * remove aa * temp * skeleton kitti_generate * skeleton kitti_generate (2) * refactor file * remove old get_input_data * refactor geometric eval * refactor geometric eval(2) * temp * refactor geometric * change saving order for txts * update pixel to camera * update depth * Fix pixel to camera * add xyz_from_distance * use new function * fix std_ale calculation in eval * remove debug points --- README.md | 2 +- src/eval/generate_kitti.py | 159 +++++++++++++++++++++++++++++ src/eval/geom_baseline.py | 167 +++++++++++------------------- src/eval/kitti_eval.py | 59 ++++++++--- src/eval/run_kitti.py | 186 ---------------------------------- src/features/preprocess_ki.py | 49 ++++----- src/features/preprocess_nu.py | 36 +++---- src/main.py | 17 ++-- src/models/hyp_tuning.py | 6 +- src/models/trainer.py | 22 ++-- src/predict/factory.py | 61 +++++++++-- src/predict/monoloco.py | 113 +++++---------------- src/predict/predict.py | 8 +- src/utils/camera.py | 138 +++++++++++++------------ src/utils/kitti.py | 60 +++-------- src/utils/misc.py | 92 ++++------------- src/utils/normalize.py | 65 ------------ src/utils/pifpaf.py | 100 ++++++++++-------- src/visuals/printer.py | 17 ++-- 19 files changed, 579 insertions(+), 778 deletions(-) create mode 100644 src/eval/generate_kitti.py delete mode 100644 src/eval/run_kitti.py diff --git a/README.md b/README.md index 86aab35..166f8a6 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,7 @@ in txt file with format comparable to other baseline. Then the model performs evaluation. The following graph is obtained running: -`python3 src/main.py eval --dataset kitti --run_kitti --model data/models/monoloco-190513-1437.pkl +`python3 src/main.py eval --dataset kitti --generate --model data/models/monoloco-190513-1437.pkl --dir_ann ` ![kitti_evaluation](docs/results.png) diff --git a/src/eval/generate_kitti.py b/src/eval/generate_kitti.py new file mode 100644 index 0000000..b10be9a --- /dev/null +++ b/src/eval/generate_kitti.py @@ -0,0 +1,159 @@ +"""Run monoloco over all the pifpaf joints of KITTI images +and extract and save the annotations in txt files""" + + +import math +import os +import glob +import json +import shutil +import itertools + +import numpy as np +import torch + +from predict.monoloco import MonoLoco +from eval.geom_baseline import compute_distance +from utils.kitti import get_calibration +from utils.pifpaf import preprocess_pif +from utils.camera import xyz_from_distance, get_keypoints, pixel_to_camera + + +def generate_kitti(model, dir_ann, p_dropout=0.2, n_dropout=0): + + cnt_ann = 0 + cnt_file = 0 + cnt_no_file = 0 + + dir_kk = os.path.join('data', 'kitti', 'calib') + dir_out = os.path.join('data', 'kitti', 'monoloco') + + # Remove the output directory if alreaady exists (avoid residual txt files) + if os.path.exists(dir_out): + shutil.rmtree(dir_out) + os.makedirs(dir_out) + print("Created empty output directory for txt files") + + # Load monoloco + use_cuda = torch.cuda.is_available() + device = torch.device("cuda" if use_cuda else "cpu") + monoloco = MonoLoco(model_path=model, device=device, n_dropout=n_dropout, p_dropout=p_dropout) + + # Run monoloco over the list of images + list_basename = factory_basename(dir_ann) + for basename in list_basename: + path_calib = os.path.join(dir_kk, basename + '.txt') + annotations, kk, tt, _ = factory_file(path_calib, dir_ann, basename) + boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374)) + + if not keypoints: + cnt_no_file += 1 + else: + # Run the network and the geometric baseline + outputs, varss = monoloco.forward(keypoints, kk) + dds_geom = eval_geometric(keypoints, kk, average_y=0.48) + + # Save the file + all_outputs = [outputs.detach().cpu(), varss.detach().cpu(), dds_geom] + all_inputs = [boxes, keypoints] + all_params = [kk, tt] + path_txt = os.path.join(dir_out, basename + '.txt') + save_txts(path_txt, all_inputs, all_outputs, all_params) + + # Update counting + cnt_ann += len(boxes) + cnt_file += 1 + + # Print statistics + print("Saved in {} txt {} annotations. Not found {} images" + .format(cnt_file, cnt_ann, cnt_no_file)) + + +def save_txts(path_txt, all_inputs, all_outputs, all_params): + + outputs, varss, dds_geom = all_outputs[:] + uv_boxes, keypoints = all_inputs[:] + kk, tt = all_params[:] + + uv_centers = get_keypoints(keypoints, mode='center') + xy_centers = pixel_to_camera(uv_centers, kk, 1) + zzs = xyz_from_distance(outputs[:, 0:1], xy_centers)[:, 2].tolist() + + with open(path_txt, "w+") as ff: + for idx in range(outputs.shape[0]): + + xx = float(xy_centers[idx][0]) * zzs[idx] + tt[0] + yy = float(xy_centers[idx][1]) * zzs[idx] + tt[1] + zz = zzs[idx] + tt[2] + dd = math.sqrt(xx ** 2 + yy ** 2 + zz ** 2) + cam_0 = [xx, yy, zz, dd] + + for el in uv_boxes[idx][:]: + ff.write("%s " % el) + for el in cam_0: + ff.write("%s " % el) + ff.write("%s " % float(outputs[idx][1])) + ff.write("%s " % float(varss[idx])) + ff.write("%s " % dds_geom[idx]) + ff.write("\n") + + # Save intrinsic matrix in the last row + for kk_el in itertools.chain(*kk): # Flatten a list of lists + ff.write("%f " % kk_el) + ff.write("\n") + + +def factory_basename(dir_ann): + """ Return all the basenames in the annotations folder""" + + list_ann = glob.glob(os.path.join(dir_ann, '*.json')) + list_basename = [os.path.basename(x).split('.')[0] for x in list_ann] + assert list_basename, " Missing json annotations file to create txt files for KITTI datasets" + return list_basename + + +def factory_file(path_calib, dir_ann, basename, ite=0): + """Choose the annotation and the calibration files. Stereo option with ite = 1""" + + stereo_file = True + p_left, p_right = get_calibration(path_calib) + + if ite == 0: + kk, tt = p_left[:] + path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json') + else: + kk, tt = p_right[:] + path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json') + + try: + with open(path_ann, 'r') as f: + annotations = json.load(f) + except FileNotFoundError: + annotations = None + if ite == 1: + stereo_file = False + + return annotations, kk, tt, stereo_file + + +def eval_geometric(keypoints, kk, average_y=0.48): + """ Evaluate geometric distance""" + + dds_geom = [] + + uv_centers = get_keypoints(keypoints, mode='center') + uv_shoulders = get_keypoints(keypoints, mode='shoulder') + uv_hips = get_keypoints(keypoints, mode='hip') + + xy_centers = pixel_to_camera(uv_centers, kk, 1) + xy_shoulders = pixel_to_camera(uv_shoulders, kk, 1) + xy_hips = pixel_to_camera(uv_hips, kk, 1) + + for idx, xy_center in enumerate(xy_centers): + zz = compute_distance(xy_shoulders[idx], xy_hips[idx], average_y) + xyz_center = np.array([xy_center[0], xy_center[1], zz]) + dd_geom = float(np.linalg.norm(xyz_center)) + dds_geom.append(dd_geom) + + return dds_geom + diff --git a/src/eval/geom_baseline.py b/src/eval/geom_baseline.py index d49295e..cd7e6a8 100644 --- a/src/eval/geom_baseline.py +++ b/src/eval/geom_baseline.py @@ -1,85 +1,70 @@ -import glob import json import logging -import os -import numpy as np import math from collections import defaultdict -from utils.camera import pixel_to_camera + +import numpy as np + +from utils.camera import pixel_to_camera, get_keypoints + +AVERAGE_Y = 0.48 +CLUSTERS = ['10', '20', '30', 'all'] +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) -class GeomBaseline: +def geometric_baseline(joints): + """ + List of json files --> 2 lists with mean and std for each segment and the total count of instances - def __init__(self, joints): + For each annotation: + 1. From gt boxes calculate the height (deltaY) for the segments head, shoulder, hip, ankle + 2. From mask boxes calculate distance of people using average height of people and real pixel height - # Initialize directories - self.clusters = ['10', '20', '30', '>30', 'all'] - self.average_y = 0.48 - self.joints = joints + For left-right ambiguities we chose always the average of the joints - from utils.misc import calculate_iou - self.calculate_iou = calculate_iou - from utils.nuscenes import get_unique_tokens, split_scenes - self.get_unique_tokens = get_unique_tokens - self.split_scenes = split_scenes + The joints are mapped from 0 to 16 in the following order: + ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder', 'left_elbow', + 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle', + 'right_ankle'] - logging.basicConfig(level=logging.INFO) - self.logger = logging.getLogger(__name__) + """ + cnt_tot = 0 + dic_dist = defaultdict(lambda: defaultdict(list)) - def run(self): - """ - List of json files --> 2 lists with mean and std for each segment and the total count of instances + # Access the joints file + with open(joints, 'r') as ff: + dic_joints = json.load(ff) - For each annotation: - 1. From gt boxes calculate the height (deltaY) for the segments head, shoulder, hip, ankle - 2. From mask boxes calculate distance of people using average height of people and real pixel height + # Calculate distances for all the instances in the joints dictionary + for phase in ['train', 'val']: + cnt = update_distances(dic_joints[phase], dic_dist, phase, AVERAGE_Y) + cnt_tot += cnt - For left-right ambiguities we chose always the average of the joints + # Calculate mean and std of each segment + dic_h_means = calculate_heights(dic_dist['heights'], mode='mean') + dic_h_stds = calculate_heights(dic_dist['heights'], mode='std') + errors = calculate_error(dic_dist['error']) - The joints are mapped from 0 to 16 in the following order: - ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder', 'left_elbow', - 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle', - 'right_ankle'] - - """ - cnt_tot = 0 - dic_dist = defaultdict(lambda: defaultdict(list)) - - # Access the joints file - with open(self.joints, 'r') as ff: - dic_joints = json.load(ff) - - # Calculate distances for all the segments - for phase in ['train', 'val']: - cnt = update_distances(dic_joints[phase], dic_dist, phase, self.average_y) - cnt_tot += cnt - - dic_h_means = calculate_heights(dic_dist['heights'], mode='mean') - dic_h_stds = calculate_heights(dic_dist['heights'], mode='std') - - self.logger.info("Computed distance of {} annotations".format(cnt_tot)) - - for key in dic_h_means: - self.logger.info("Average height of segment {} is {:.2f} with a std of {:.2f}". - format(key, dic_h_means[key], dic_h_stds[key])) - - errors = calculate_error(dic_dist['error']) - - for clst in self.clusters: - self.logger.info("Average distance over the val set for clst {}: {:.2f}".format(clst, errors[clst])) - - self.logger.info("Joints used: {}".format(self.joints)) + # Show results + logger.info("Computed distance of {} annotations".format(cnt_tot)) + for key in dic_h_means: + logger.info("Average height of segment {} is {:.2f} with a std of {:.2f}". + format(key, dic_h_means[key], dic_h_stds[key])) + for clst in CLUSTERS: + logger.info("Average error over the val set for clst {}: {:.2f}".format(clst, errors[clst])) + logger.info("Joints used: {}".format(joints)) def update_distances(dic_fin, dic_dist, phase, average_y): # Loop over each annotation in the json file corresponding to the image - cnt = 0 for idx, kps in enumerate(dic_fin['kps']): + # Extract pixel coordinates of head, shoulder, hip, ankle and and save them - dic_uv = extract_pixel_coord(kps) + dic_uv = {mode: get_keypoints(kps, mode) for mode in ['head', 'shoulder', 'hip', 'ankle']} # Convert segments from pixel coordinate to camera coordinate kk = dic_fin['K'][idx] @@ -87,26 +72,21 @@ def update_distances(dic_fin, dic_dist, phase, average_y): # Create a dict with all annotations in meters dic_xyz = {key: pixel_to_camera(dic_uv[key], kk, z_met) for key in dic_uv} + dic_xyz_norm = {key: pixel_to_camera(dic_uv[key], kk, 1) for key in dic_uv} # Compute real height - dy_met = abs(dic_xyz['hip'][1] - dic_xyz['shoulder'][1]) + dy_met = abs(float((dic_xyz['hip'][0][1] - dic_xyz['shoulder'][0][1]))) # Estimate distance for a single annotation - z_met_real, _ = compute_distance_single(dic_uv['shoulder'], dic_uv['hip'], kk, average_y, - mode='real', dy_met=dy_met) - z_met_approx, _ = compute_distance_single(dic_uv['shoulder'], dic_uv['hip'], kk, average_y, - mode='average') + z_met_real = compute_distance(dic_xyz_norm['shoulder'][0], dic_xyz_norm['hip'][0], average_y, + mode='real', dy_met=dy_met) + z_met_approx = compute_distance(dic_xyz_norm['shoulder'][0], dic_xyz_norm['hip'][0], average_y, mode='average') # Compute distance with respect to the center of the 3D bounding box - xyz_met = np.array(dic_fin['boxes_3d'][idx][0:3]) - d_met = np.linalg.norm(xyz_met) d_real = math.sqrt(z_met_real ** 2 + dic_fin['boxes_3d'][idx][0] ** 2 + dic_fin['boxes_3d'][idx][1] ** 2) d_approx = math.sqrt(z_met_approx ** 2 + dic_fin['boxes_3d'][idx][0] ** 2 + dic_fin['boxes_3d'][idx][1] ** 2) - # if abs(d_qmet - d_real) > 1e-1: # "Error in computing distance with real height in pixels" - # aa = 5 - # Update the dictionary with distance and heights metrics dic_dist = update_dic_dist(dic_dist, dic_xyz, d_real, d_approx, phase) cnt += 1 @@ -114,22 +94,18 @@ def update_distances(dic_fin, dic_dist, phase, average_y): return cnt -def compute_distance_single(uv_1, uv_2, kk, average_y, mode='average', dy_met=0): - +def compute_distance(xyz_norm_1, xyz_norm_2, average_y, mode='average', dy_met=0): """ Compute distance Z of a mask annotation (solving a linear system) for 2 possible cases: 1. knowing specific height of the annotation (head-ankle) dy_met 2. using mean height of people (average_y) """ assert mode == 'average' or mode == 'real' - # Trasform into normalized camera coordinates (plane at 1m) - xyz_met_norm_1 = pixel_to_camera(uv_1, kk, 1) - xyz_met_norm_2 = pixel_to_camera(uv_2, kk, 1) - x1 = xyz_met_norm_1[0] - y1 = xyz_met_norm_1[1] - x2 = xyz_met_norm_2[0] - y2 = xyz_met_norm_2[1] + x1 = float(xyz_norm_1[0]) + y1 = float(xyz_norm_1[1]) + x2 = float(xyz_norm_2[0]) + y2 = float(xyz_norm_2[1]) xx = (x1 + x2) / 2 # Choose if solving for provided height or average one. @@ -138,9 +114,6 @@ def compute_distance_single(uv_1, uv_2, kk, average_y, mode='average', dy_met=0) else: cc = -dy_met - # if - 3 * average_y <= cc <= -2: - # aa = 5 - # Solving the linear system Ax = b Aa = np.array([[y1, 0, -xx], [0, -y1, 1], @@ -151,26 +124,7 @@ def compute_distance_single(uv_1, uv_2, kk, average_y, mode='average', dy_met=0) xx = np.linalg.lstsq(Aa, bb, rcond=None) z_met = abs(np.float(xx[0][1])) # Abs take into account specularity behind the observer - # Compute the absolute x and y coordinates in meters - xyz_met_1 = xyz_met_norm_1 * z_met - xyz_met_2 = xyz_met_norm_2 * z_met - - return z_met, (xyz_met_1, xyz_met_2) - - -def extract_pixel_coord(kps): - - """Extract uv coordinates from keypoints and save them in a dict """ - # For each level of height (e.g. 5 points in the head), take the average of them - - uv_head = np.array([np.average(kps[0][0:5]), np.average(kps[1][0:5]), 1]) - uv_shoulder = np.array([np.average(kps[0][5:7]), np.average(kps[1][5:7]), 1]) - uv_hip = np.array([np.average(kps[0][11:13]), np.average(kps[1][11:13]), 1]) - uv_ankle = np.array([np.average(kps[0][15:17]), np.average(kps[1][15:17]), 1]) - - dic_uv = {'head': uv_head, 'shoulder': uv_shoulder, 'hip': uv_hip, 'ankle': uv_ankle} - - return dic_uv + return z_met def update_dic_dist(dic_dist, dic_xyz, d_real, d_approx, phase): @@ -178,10 +132,10 @@ def update_dic_dist(dic_dist, dic_xyz, d_real, d_approx, phase): # Update the dict with heights metric if phase == 'train': - dic_dist['heights']['head'].append(np.float(dic_xyz['head'][1])) - dic_dist['heights']['shoulder'].append(np.float(dic_xyz['shoulder'][1])) - dic_dist['heights']['hip'].append(np.float(dic_xyz['hip'][1])) - dic_dist['heights']['ankle'].append(np.float(dic_xyz['ankle'][1])) + dic_dist['heights']['head'].append(float(dic_xyz['head'][0][1])) + dic_dist['heights']['shoulder'].append(float(dic_xyz['shoulder'][0][1])) + dic_dist['heights']['hip'].append(float(dic_xyz['hip'][0][1])) + dic_dist['heights']['ankle'].append(float(dic_xyz['ankle'][0][1])) # Update the dict with distance metrics for the test phase if phase == 'val': @@ -235,11 +189,8 @@ def calculate_error(dic_errors): """ Compute statistics of distances based on the distance """ - errors = {} for clst in dic_errors: - errors[clst] = np.float(np.mean(np.array(dic_errors[clst]))) - return errors diff --git a/src/eval/kitti_eval.py b/src/eval/kitti_eval.py index f2ca67b..ca99c63 100644 --- a/src/eval/kitti_eval.py +++ b/src/eval/kitti_eval.py @@ -6,7 +6,7 @@ import logging from collections import defaultdict import datetime -from utils.misc import get_iou_matches +from utils.misc import get_iou_matches, get_task_error from utils.kitti import check_conditions, get_category, split_training, parse_ground_truth from visuals.results import print_results @@ -117,6 +117,11 @@ class KittiEval: print("\n Number of matched annotations: {:.1f} %".format(self.errors[key]['matched'])) print("-"*100) + print("\n Annotations inside the confidence interval: {:.1f} %" + .format(100 * self.dic_stats['test']['our']['all']['interval'])) + print("precision 1: {:.2f}".format(self.dic_stats['test']['our']['all']['prec_1'])) + print("precision 2: {:.2f}".format(self.dic_stats['test']['our']['all']['prec_2'])) + def printer(self, show): print_results(self.dic_stats, show) @@ -171,15 +176,13 @@ class KittiEval: file_lines = ff.readlines() for line_our in file_lines[:-1]: line_list = [float(x) for x in line_our.split()] + if check_conditions(line_list, thresh=self.dic_thresh_conf[method], mode=method): boxes.append(line_list[:4]) - # xyzs.append(line_list[4:7]) - dds.append(line_list[7]) - stds_ale.append(line_list[8]) - stds_epi.append(line_list[9]) + dds.append(line_list[8]) + stds_ale.append(line_list[9]) + stds_epi.append(line_list[10]) dds_geom.append(line_list[11]) - # xy_kps.append(line_list[12:]) - self.dic_cnt[method] += 1 # kk_list = [float(x) for x in file_lines[-1].split()] @@ -238,7 +241,6 @@ class KittiEval: self.dic_cnt['merged'] += 1 def update_errors(self, dd, dd_gt, cat, errors): - """Compute and save errors between a single box and the gt box which match""" diff = abs(dd - dd_gt) @@ -274,26 +276,49 @@ class KittiEval: self.dic_stds[cat]['epi'].append(std_epi) # Number of annotations inside the confidence interval - if dd_gt <= dd: # Particularly dangerous instances + std = std_epi if std_epi > 0 else std_ale # consider aleatoric uncertainty if epistemic is not calculated + if abs(dd - dd_gt) <= std: + self.dic_stds['all']['interval'].append(1) + self.dic_stds[clst]['interval'].append(1) + self.dic_stds[cat]['interval'].append(1) + else: + self.dic_stds['all']['interval'].append(0) + self.dic_stds[clst]['interval'].append(0) + self.dic_stds[cat]['interval'].append(0) + + # Annotations at risk inside the confidence interval + if dd_gt <= dd: self.dic_stds['all']['at_risk'].append(1) self.dic_stds[clst]['at_risk'].append(1) self.dic_stds[cat]['at_risk'].append(1) if abs(dd - dd_gt) <= std_epi: - self.dic_stds['all']['interval'].append(1) - self.dic_stds[clst]['interval'].append(1) - self.dic_stds[cat]['interval'].append(1) - + self.dic_stds['all']['at_risk-interval'].append(1) + self.dic_stds[clst]['at_risk-interval'].append(1) + self.dic_stds[cat]['at_risk-interval'].append(1) else: - self.dic_stds['all']['interval'].append(0) - self.dic_stds[clst]['interval'].append(0) - self.dic_stds[cat]['interval'].append(0) + self.dic_stds['all']['at_risk-interval'].append(0) + self.dic_stds[clst]['at_risk-interval'].append(0) + self.dic_stds[cat]['at_risk-interval'].append(0) else: self.dic_stds['all']['at_risk'].append(0) self.dic_stds[clst]['at_risk'].append(0) self.dic_stds[cat]['at_risk'].append(0) + # Precision of uncertainty + eps = 1e-4 + task_error = get_task_error(dd) + prec_1 = abs(dd - dd_gt) / (std_epi + eps) + + prec_2 = abs(std_epi - task_error) + self.dic_stds['all']['prec_1'].append(prec_1) + self.dic_stds[clst]['prec_1'].append(prec_1) + self.dic_stds[cat]['prec_1'].append(prec_1) + self.dic_stds['all']['prec_2'].append(prec_2) + self.dic_stds[clst]['prec_2'].append(prec_2) + self.dic_stds[cat]['prec_2'].append(prec_2) + def get_statistics(dic_stats, errors, dic_stds, key): """Update statistics of a cluster""" @@ -307,6 +332,8 @@ def get_statistics(dic_stats, errors, dic_stds, key): dic_stats['std_epi'] = sum(dic_stds['epi']) / float(len(dic_stds['epi'])) dic_stats['interval'] = sum(dic_stds['interval']) / float(len(dic_stds['interval'])) dic_stats['at_risk'] = sum(dic_stds['at_risk']) / float(len(dic_stds['at_risk'])) + dic_stats['prec_1'] = sum(dic_stds['prec_1']) / float(len(dic_stds['prec_1'])) + dic_stats['prec_2'] = sum(dic_stds['prec_2']) / float(len(dic_stds['prec_2'])) def add_true_negatives(err, cnt_gt): diff --git a/src/eval/run_kitti.py b/src/eval/run_kitti.py deleted file mode 100644 index e096baf..0000000 --- a/src/eval/run_kitti.py +++ /dev/null @@ -1,186 +0,0 @@ -"""Run monoloco over all the pifpaf joints of KITTI images -and extract and save the annotations in txt files""" - - -import math -import os -import glob -import json -import logging - -import numpy as np -import torch - -from models.architectures import LinearModel -from utils.misc import laplace_sampling -from utils.kitti import eval_geometric, get_calibration -from utils.normalize import unnormalize_bi -from utils.pifpaf import get_input_data, preprocess_pif -from utils.camera import get_depth_from_distance - - -class RunKitti: - - logging.basicConfig(level=logging.INFO) - logger = logging.getLogger(__name__) - cnt_ann = 0 - cnt_file = 0 - cnt_no_file = 0 - average_y = 0.48 - n_samples = 100 - - def __init__(self, model, dir_ann, dropout, hidden_size, n_stage, n_dropout): - - self.dir_ann = dir_ann - self.n_dropout = n_dropout - self.dir_kk = os.path.join('data', 'kitti', 'calib') - self.dir_out = os.path.join('data', 'kitti', 'monoloco') - if not os.path.exists(self.dir_out): - os.makedirs(self.dir_out) - print("Created output directory for txt files") - - self.list_basename = factory_basename(dir_ann) - - # Load the model - input_size = 17 * 2 - use_cuda = torch.cuda.is_available() - self.device = torch.device("cuda" if use_cuda else "cpu") - self.model = LinearModel(input_size=input_size, output_size=2, linear_size=hidden_size, - p_dropout=dropout, num_stage=n_stage) - self.model.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage)) - self.model.eval() # Default is train - self.model.to(self.device) - - def run(self): - - # Run inference - for basename in self.list_basename: - - path_calib = os.path.join(self.dir_kk, basename + '.txt') - - annotations, kk, tt, _ = factory_file(path_calib, self.dir_ann, basename) - - boxes, keypoints = preprocess_pif(annotations) - (inputs, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = get_input_data(boxes, keypoints, kk) - - dds_geom, xy_centers = eval_geometric(uv_kps, uv_centers, uv_shoulders, kk, average_y=0.48) - - # Update counting - self.cnt_ann += len(boxes) - if not inputs: - self.cnt_no_file += 1 - else: - self.cnt_file += 1 - - # Run the model - inputs = torch.from_numpy(np.array(inputs)).float().to(self.device) - if self.n_dropout > 0: - total_outputs = torch.empty((0, len(uv_boxes))).to(self.device) - self.model.dropout.training = True - for _ in range(self.n_dropout): - outputs = self.model(inputs) - outputs = unnormalize_bi(outputs) - samples = laplace_sampling(outputs, self.n_samples) - total_outputs = torch.cat((total_outputs, samples), 0) - varss = total_outputs.std(0) - - else: - varss = [0]*len(uv_boxes) - - # Don't use dropout for the mean prediction and aleatoric uncertainty - self.model.dropout.training = False - outputs_net = self.model(inputs) - outputs = outputs_net.cpu().detach().numpy() - - list_zzs = get_depth_from_distance(outputs, xy_centers) - all_outputs = [outputs, varss, dds_geom] - all_inputs = [uv_boxes, xy_centers, xy_kps] - all_params = [kk, tt] - - # Save the file - all_outputs.append(list_zzs) - path_txt = os.path.join(self.dir_out, basename + '.txt') - save_txts(path_txt, all_inputs, all_outputs, all_params) - aa = 5 - - # Print statistics - print("Saved in {} txt {} annotations. Not found {} images" - .format(self.cnt_file, self.cnt_ann, self.cnt_no_file)) - - -def save_txts(path_txt, all_inputs, all_outputs, all_params): - - outputs, varss, dds_geom, zzs = all_outputs[:] - uv_boxes, xy_centers, xy_kps = all_inputs[:] - kk, tt = all_params[:] - - with open(path_txt, "w+") as ff: - for idx in range(outputs.shape[0]): - xx_1 = float(xy_centers[idx][0]) - yy_1 = float(xy_centers[idx][1]) - xy_kp = xy_kps[idx] - dd = float(outputs[idx][0]) - std_ale = math.exp(float(outputs[idx][1])) * dd - zz = zzs[idx] - xx_cam_0 = xx_1 * zz + tt[0] - yy_cam_0 = yy_1 * zz + tt[1] - zz_cam_0 = zz + tt[2] - dd_cam_0 = math.sqrt(xx_cam_0 ** 2 + yy_cam_0 ** 2 + zz_cam_0 ** 2) - - uv_box = uv_boxes[idx] - - twodecimals = ["%.3f" % vv for vv in [uv_box[0], uv_box[1], uv_box[2], uv_box[3], - xx_cam_0, yy_cam_0, zz_cam_0, dd_cam_0, - std_ale, varss[idx], uv_box[4], dds_geom[idx]]] - - keypoints_str = ["%.5f" % vv for vv in xy_kp] - for item in twodecimals: - ff.write("%s " % item) - for item in keypoints_str: - ff.write("%s " % item) - ff.write("\n") - - # Save intrinsic matrix in the last row - kk_list = kk.reshape(-1, ).tolist() - for kk_el in kk_list: - ff.write("%f " % kk_el) - ff.write("\n") - - - -def factory_basename(dir_ann): - """ Return all the basenames in the annotations folder""" - - list_ann = glob.glob(os.path.join(dir_ann, '*.json')) - list_basename = [os.path.basename(x).split('.')[0] for x in list_ann] - assert list_basename, " Missing json annotations file to create txt files for KITTI datasets" - - return list_basename - - -def factory_file(path_calib, dir_ann, basename, ite=0): - """Choose the annotation and the calibration files. Stereo option with ite = 1""" - - stereo_file = True - p_left, p_right = get_calibration(path_calib) - - if ite == 0: - kk, tt = p_left[:] - path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json') - else: - kk, tt = p_right[:] - path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json') - - try: - with open(path_ann, 'r') as f: - annotations = json.load(f) - except FileNotFoundError: - annotations = None - if ite == 1: - stereo_file = False - - return annotations, kk, tt, stereo_file - - - - diff --git a/src/features/preprocess_ki.py b/src/features/preprocess_ki.py index a4afec0..cf42918 100644 --- a/src/features/preprocess_ki.py +++ b/src/features/preprocess_ki.py @@ -7,9 +7,11 @@ import logging from collections import defaultdict import json import datetime +import torch + from utils.kitti import get_calibration, split_training, parse_ground_truth -from utils.pifpaf import get_input_data, preprocess_pif -from utils.misc import get_idx_max, append_cluster +from utils.pifpaf import get_network_inputs, preprocess_pif +from utils.misc import get_iou_matches, append_cluster class PreprocessKitti: @@ -26,10 +28,10 @@ class PreprocessKitti: clst=defaultdict(lambda: defaultdict(list)))} dic_names = defaultdict(lambda: defaultdict(list)) - def __init__(self, dir_ann, iou_thresh=0.3): + def __init__(self, dir_ann, iou_min=0.3): self.dir_ann = dir_ann - self.iou_thresh = iou_thresh + self.iou_min = iou_min self.dir_gt = os.path.join('data', 'kitti', 'gt') self.names_gt = tuple(os.listdir(self.dir_gt)) self.dir_kk = os.path.join('data', 'kitti', 'calib') @@ -70,10 +72,14 @@ class PreprocessKitti: kk = p_left[0] # Iterate over each line of the gt file and save box location and distances - (boxes_gt, boxes_3d, dds_gt, _, _) = parse_ground_truth(path_gt) + if phase == 'train': + (boxes_gt, boxes_3d, dds_gt, _, _) = parse_ground_truth(path_gt, mode='gt_all') # Also cyclists + else: + (boxes_gt, boxes_3d, dds_gt, _, _) = parse_ground_truth(path_gt, mode='gt') # only pedestrians + self.dic_names[basename + '.png']['boxes'] = copy.deepcopy(boxes_gt) self.dic_names[basename + '.png']['dds'] = copy.deepcopy(dds_gt) - self.dic_names[basename + '.png']['K'] = copy.deepcopy(kk.tolist()) + self.dic_names[basename + '.png']['K'] = copy.deepcopy(kk) cnt_gt += len(boxes_gt) cnt_files += 1 cnt_files_ped += min(len(boxes_gt), 1) # if no boxes 0 else 1 @@ -82,28 +88,23 @@ class PreprocessKitti: try: with open(os.path.join(self.dir_ann, basename + '.png.pifpaf.json'), 'r') as f: annotations = json.load(f) - boxes, keypoints = preprocess_pif(annotations) - (inputs, _), (uv_kps, uv_boxes, _, _) = get_input_data(boxes, keypoints, kk) + boxes, keypoints = preprocess_pif(annotations, im_size=(1238, 374)) + inputs = get_network_inputs(keypoints, kk).tolist() except FileNotFoundError: - uv_boxes = [] + boxes = [] # Match each set of keypoint with a ground truth - for ii, box in enumerate(uv_boxes): - idx_max, iou_max = get_idx_max(box, boxes_gt) - - if iou_max >= self.iou_thresh: - - self.dic_jo[phase]['kps'].append(uv_kps[ii]) - self.dic_jo[phase]['X'].append(inputs[ii]) - self.dic_jo[phase]['Y'].append([dds_gt[idx_max]]) # Trick to make it (nn,1) - self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_max]) - self.dic_jo[phase]['K'].append(kk.tolist()) - self.dic_jo[phase]['names'].append(name) # One image name for each annotation - append_cluster(self.dic_jo, phase, inputs[ii], dds_gt[idx_max], uv_kps[ii]) - dic_cnt[phase] += 1 - boxes_gt.pop(idx_max) - dds_gt.pop(idx_max) + matches = get_iou_matches(boxes, boxes_gt, self.iou_min) + for (idx, idx_gt) in matches: + self.dic_jo[phase]['kps'].append(keypoints[idx]) + self.dic_jo[phase]['X'].append(inputs[idx]) + self.dic_jo[phase]['Y'].append([dds_gt[idx_gt]]) # Trick to make it (nn,1) + self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_gt]) + self.dic_jo[phase]['K'].append(kk) + self.dic_jo[phase]['names'].append(name) # One image name for each annotation + append_cluster(self.dic_jo, phase, inputs[idx], dds_gt[idx_gt], keypoints[idx]) + dic_cnt[phase] += 1 with open(self.path_joints, 'w') as file: json.dump(self.dic_jo, file) diff --git a/src/features/preprocess_nu.py b/src/features/preprocess_nu.py index e1a8478..c294c44 100644 --- a/src/features/preprocess_nu.py +++ b/src/features/preprocess_nu.py @@ -12,10 +12,10 @@ import numpy as np from nuscenes.nuscenes import NuScenes from nuscenes.utils import splits -from utils.misc import get_idx_max, append_cluster +from utils.misc import get_iou_matches, append_cluster from utils.nuscenes import select_categories from utils.camera import project_3d -from utils.pifpaf import get_input_data, preprocess_pif +from utils.pifpaf import preprocess_pif, get_network_inputs class PreprocessNuscenes: @@ -90,6 +90,7 @@ class PreprocessNuscenes: sd_token = sample_dic['data'][cam] cnt_sd += 1 path_im, boxes_obj, kk = self.nusc.get_sample_data(sd_token, box_vis_level=1) # At least one corner + kk = kk.tolist() # Extract all the annotations of the person boxes_gt = [] @@ -110,7 +111,7 @@ class PreprocessNuscenes: boxes_3d.append(box_3d) self.dic_names[name]['boxes'].append(box) self.dic_names[name]['dds'].append(dd) - self.dic_names[name]['K'] = kk.tolist() + self.dic_names[name]['K'] = kk # Run IoU with pifpaf detections and save path_pif = os.path.join(self.dir_ann, name + '.pifpaf.json') @@ -120,27 +121,22 @@ class PreprocessNuscenes: with open(path_pif, 'r') as file: annotations = json.load(file) - boxes, keypoints = preprocess_pif(annotations, im_size=None) - (inputs, _), (uv_kps, uv_boxes, _, _) = get_input_data(boxes, keypoints, kk) + boxes, keypoints = preprocess_pif(annotations, im_size=(1600, 900)) - for ii, box in enumerate(uv_boxes): - idx_max, iou_max = get_idx_max(box, boxes_gt) + if keypoints: + inputs = get_network_inputs(keypoints, kk).tolist() - if iou_max > self.iou_min: - - self.dic_jo[phase]['kps'].append(uv_kps[ii]) - self.dic_jo[phase]['X'].append(inputs[ii]) - self.dic_jo[phase]['Y'].append([dds[idx_max]]) # Trick to make it (nn,1) + matches = get_iou_matches(boxes, boxes_gt, self.iou_min) + for (idx, idx_gt) in matches: + self.dic_jo[phase]['kps'].append(keypoints[idx]) + self.dic_jo[phase]['X'].append(inputs[idx]) + self.dic_jo[phase]['Y'].append([dds[idx_gt]]) # Trick to make it (nn,1) self.dic_jo[phase]['names'].append(name) # One image name for each annotation - self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_max]) - self.dic_jo[phase]['K'].append(kk.tolist()) - append_cluster(self.dic_jo, phase, inputs[ii], dds[idx_max], uv_kps[ii]) - boxes_gt.pop(idx_max) - dds.pop(idx_max) - boxes_3d.pop(idx_max) + self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_gt]) + self.dic_jo[phase]['K'].append(kk) + append_cluster(self.dic_jo, phase, inputs[idx], dds[idx_gt], keypoints[idx]) cnt_ann += 1 - sys.stdout.write('\r' + 'Saved annotations {}' - .format(cnt_ann) + '\t') + sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t') current_token = sample_dic['next'] diff --git a/src/main.py b/src/main.py index a20a070..50c8d81 100644 --- a/src/main.py +++ b/src/main.py @@ -11,8 +11,8 @@ from features.preprocess_nu import PreprocessNuscenes from features.preprocess_ki import PreprocessKitti from predict.predict import predict from models.trainer import Trainer -from eval.run_kitti import RunKitti -from eval.geom_baseline import GeomBaseline +from eval.generate_kitti import generate_kitti +from eval.geom_baseline import geometric_baseline from models.hyp_tuning import HypTuning from eval.kitti_eval import KittiEval @@ -66,6 +66,7 @@ def cli(): predict_parser.add_argument('--predict', help='whether to make prediction', action='store_true') predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=22) predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0) + predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2) predict_parser.add_argument('--combined', help='to print combined images', action='store_true') # Training @@ -88,7 +89,7 @@ def cli(): # Evaluation eval_parser.add_argument('--dataset', help='datasets to evaluate, kitti or nuscenes', default='kitti') eval_parser.add_argument('--geometric', help='to evaluate geometric distance', action='store_true') - eval_parser.add_argument('--run_kitti', help='create txt files for KITTI evaluation', action='store_true') + eval_parser.add_argument('--generate', help='create txt files for KITTI evaluation', action='store_true') eval_parser.add_argument('--dir_ann', help='directory of annotations of 2d joints (for KITTI evaluation') eval_parser.add_argument('--model', help='path of MonoLoco model to load', required=True) eval_parser.add_argument('--joints', help='Json file with input joints to evaluate (for nuScenes evaluation)') @@ -133,14 +134,10 @@ def main(): elif args.command == 'eval': if args.geometric: - geometric_baseline = GeomBaseline(args.joints) - geometric_baseline.run() + geometric_baseline(args.joints) - if args.run_kitti: - run_kitti = RunKitti(model=args.model, dir_ann=args.dir_ann, - dropout=args.dropout, hidden_size=args.hidden_size, n_stage=args.n_stage, - n_dropout=args.n_dropout) - run_kitti.run() + if args.generate: + generate_kitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout) if args.dataset == 'kitti': kitti_eval = KittiEval() diff --git a/src/models/hyp_tuning.py b/src/models/hyp_tuning.py index 90e1750..a704ab3 100644 --- a/src/models/hyp_tuning.py +++ b/src/models/hyp_tuning.py @@ -32,11 +32,7 @@ class HypTuning: now = datetime.datetime.now() now_time = now.strftime("%Y%m%d-%H%M")[2:] - - if baseline: - name_out = 'hyp-baseline-' + now_time - else: - name_out = 'hyp-monoloco-' + now_time + name_out = 'hyp-baseline-' if baseline else 'hyp-monoloco-' self.path_log = os.path.join(dir_logs, name_out + now_time) self.path_model = os.path.join(dir_out, name_out + now_time + '.pkl') diff --git a/src/models/trainer.py b/src/models/trainer.py index 786acb9..f2e80e4 100644 --- a/src/models/trainer.py +++ b/src/models/trainer.py @@ -95,7 +95,7 @@ class Trainer: # Select the device and load the data use_cuda = torch.cuda.is_available() - self.device = torch.device("cuda:0" if use_cuda else "cpu") + self.device = torch.device("cuda:1" if use_cuda else "cpu") print('Device: ', self.device) # Set the seed for random initialization @@ -331,24 +331,20 @@ class Trainer: else: mean_bi = torch.mean(outputs[:, 1]).item() - max_bi = torch.max(outputs[:, 1]).item() low_bound_bi = labels >= (outputs[:, 0] - outputs[:, 1]) up_bound_bi = labels <= (outputs[:, 0] + outputs[:, 1]) bools_bi = low_bound_bi & up_bound_bi conf_bi = float(torch.sum(bools_bi)) / float(bools_bi.shape[0]) - if varss[0] == 0: - aa = 5 - - else: - mean_var = torch.mean(varss).item() - max_var = torch.max(varss).item() - - low_bound_var = labels >= (outputs[:, 0] - varss) - up_bound_var = labels <= (outputs[:, 0] + varss) - bools_var = low_bound_var & up_bound_var - conf_var = float(torch.sum(bools_var)) / float(bools_var.shape[0]) + # if varss[0] >= 0: + # mean_var = torch.mean(varss).item() + # max_var = torch.max(varss).item() + # + # low_bound_var = labels >= (outputs[:, 0] - varss) + # up_bound_var = labels <= (outputs[:, 0] + varss) + # bools_var = low_bound_var & up_bound_var + # conf_var = float(torch.sum(bools_var)) / float(bools_var.shape[0]) dic_err['mean'] += mean_mu * (outputs.size(0) / size_eval) dic_err['bi'] += mean_bi * (outputs.size(0) / size_eval) diff --git a/src/predict/factory.py b/src/predict/factory.py index c2fc94b..2d2c4b5 100644 --- a/src/predict/factory.py +++ b/src/predict/factory.py @@ -1,10 +1,11 @@ import json import os -from visuals.printer import Printer +from collections import defaultdict from openpifpaf import show - -from PIL import Image +from visuals.printer import Printer +from utils.misc import get_iou_matches, reorder_matches +from utils.camera import get_keypoints, pixel_to_camera, xyz_from_distance def factory_for_gt(im_size, name=None, path_gt=None): @@ -13,7 +14,7 @@ def factory_for_gt(im_size, name=None, path_gt=None): try: with open(path_gt, 'r') as f: dic_names = json.load(f) - print('-' * 120 + "\nMonoloco: Ground-truth file opened\n") + print('-' * 120 + "\nMonoloco: Ground-truth file opened") except FileNotFoundError: print('-' * 120 + "\nMonoloco: ground-truth file not found\n") dic_names = {} @@ -45,7 +46,6 @@ def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_ # Save json file if 'pifpaf' in args.networks: - keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:] # Visualizer @@ -74,13 +74,16 @@ def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_ skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) if 'monoloco' in args.networks: + + dic_out = monoloco_post_process(monoloco_outputs) + if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])): epistemic = False if args.n_dropout > 0: epistemic = True - printer = Printer(images_outputs[1], output_path, monoloco_outputs, kk, output_types=args.output_types, + printer = Printer(images_outputs[1], output_path, dic_out, kk, output_types=args.output_types, show=args.show, z_max=args.z_max, epistemic=epistemic) printer.print() @@ -89,3 +92,49 @@ def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_ json.dump(monoloco_outputs, ff) +def monoloco_post_process(monoloco_outputs, iou_min=0.25): + """Post process monoloco to output final dictionary with all information for visualizations""" + + dic_out = defaultdict(list) + outputs, varss, boxes, keypoints, kk, dic_gt = monoloco_outputs[:] + if dic_gt: + boxes_gt, dds_gt = dic_gt['boxes'], dic_gt['dds'] + matches = get_iou_matches(boxes, boxes_gt, thresh=iou_min) + else: + matches = [(idx, idx_gt) for idx, idx_gt in range(len(boxes))] # Replicate boxes + + matches = reorder_matches(matches, boxes, mode='left_right') + uv_shoulders = get_keypoints(keypoints, mode='shoulder') + uv_centers = get_keypoints(keypoints, mode='center') + xy_centers = pixel_to_camera(uv_centers, kk, 1) + + # Match with ground truth if available + for idx, idx_gt in matches: + dd_pred = float(outputs[idx][0]) + ale = float(outputs[idx][1]) + var_y = float(varss[idx]) + dd_real = dds_gt[idx_gt] if dic_gt else dd_pred + + kps = keypoints[idx] + box = boxes[idx] + uu_s, vv_s = uv_shoulders.tolist()[idx][0:2] + uu_c, vv_c = uv_centers.tolist()[idx][0:2] + uv_shoulder = [round(uu_s), round(vv_s)] + uv_center = [round(uu_c), round(vv_c)] + xyz_real = xyz_from_distance(dd_real, xy_centers[idx]) + xyz_pred = xyz_from_distance(dd_pred, xy_centers[idx]) + dic_out['boxes'].append(box) + dic_out['dds_real'].append(dd_real) + dic_out['dds_pred'].append(dd_pred) + dic_out['stds_ale'].append(ale) + dic_out['stds_epi'].append(var_y) + dic_out['xyz_real'].append(xyz_real.squeeze().tolist()) + dic_out['xyz_pred'].append(xyz_pred.squeeze().tolist()) + dic_out['uv_kps'].append(kps) + dic_out['uv_centers'].append(uv_center) + dic_out['uv_shoulders'].append(uv_shoulder) + + return dic_out + + + diff --git a/src/predict/monoloco.py b/src/predict/monoloco.py index 4b7b66e..148405d 100644 --- a/src/predict/monoloco.py +++ b/src/predict/monoloco.py @@ -3,18 +3,14 @@ Monoloco predictor. It receives pifpaf joints and outputs distances """ -from collections import defaultdict import logging -import time -import numpy as np import torch from models.architectures import LinearModel -from utils.camera import get_depth -from utils.misc import laplace_sampling, get_idx_max +from utils.misc import laplace_sampling from utils.normalize import unnormalize_bi -from utils.pifpaf import get_input_data +from utils.pifpaf import get_network_inputs class MonoLoco: @@ -24,97 +20,44 @@ class MonoLoco: OUTPUT_SIZE = 2 INPUT_SIZE = 17 * 2 LINEAR_SIZE = 256 - IOU_MIN = 0.25 N_SAMPLES = 100 - def __init__(self, model, device, n_dropout=0): + def __init__(self, model_path, device, n_dropout=0, p_dropout=0.2): self.device = device self.n_dropout = n_dropout - if self.n_dropout > 0: - self.epistemic = True - else: - self.epistemic = False + self.epistemic = True if self.n_dropout > 0 else False # load the model parameters - self.model = LinearModel(input_size=self.INPUT_SIZE, output_size=self.OUTPUT_SIZE, linear_size=self.LINEAR_SIZE) - self.model.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage)) + self.model = LinearModel(p_dropout=p_dropout, + input_size=self.INPUT_SIZE, output_size=self.OUTPUT_SIZE, linear_size=self.LINEAR_SIZE, + ) + self.model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage)) self.model.eval() # Default is train self.model.to(self.device) - def forward(self, boxes, keypoints, kk, dic_gt=None): + def forward(self, keypoints, kk): + """forward pass of monoloco network""" + if not keypoints: + return None - (inputs_norm, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = \ - get_input_data(boxes, keypoints, kk, left_to_right=True) + with torch.no_grad(): + inputs = get_network_inputs(torch.tensor(keypoints).to(self.device), torch.tensor(kk).to(self.device)) + if self.n_dropout > 0: + self.model.dropout.training = True # Manually reactivate dropout in eval + total_outputs = torch.empty((0, inputs.size()[0])).to(self.device) - # Conversion into torch tensor - if inputs_norm: - with torch.no_grad(): - inputs = torch.from_numpy(np.array(inputs_norm)).float() - inputs = inputs.to(self.device) - # self.model.to("cpu") - start = time.time() - # Manually reactivate dropout in eval - self.model.dropout.training = True - total_outputs = torch.empty((0, len(xy_kps))).to(self.device) - - if self.n_dropout > 0: - for _ in range(self.n_dropout): - outputs = self.model(inputs) - outputs = unnormalize_bi(outputs) - samples = laplace_sampling(outputs, self.N_SAMPLES) - total_outputs = torch.cat((total_outputs, samples), 0) - varss = total_outputs.std(0) - else: - varss = [0] * len(inputs_norm) - - # # Don't use dropout for the mean prediction - start_single = time.time() + for _ in range(self.n_dropout): + outputs = self.model(inputs) + outputs = unnormalize_bi(outputs) + samples = laplace_sampling(outputs, self.N_SAMPLES) + total_outputs = torch.cat((total_outputs, samples), 0) + varss = total_outputs.std(0) self.model.dropout.training = False - outputs = self.model(inputs) - outputs = unnormalize_bi(outputs) - end = time.time() - print("Total Forward pass time with {} forward passes = {:.2f} ms" - .format(self.n_dropout, (end-start) * 1000)) - print("Single forward pass time = {:.2f} ms".format((end - start_single) * 1000)) - - # Create output files - dic_out = defaultdict(list) - if dic_gt: - boxes_gt, dds_gt = dic_gt['boxes'], dic_gt['dds'] - - for idx, box in enumerate(uv_boxes): - dd_pred = float(outputs[idx][0]) - ale = float(outputs[idx][1]) - var_y = float(varss[idx]) - - # Find the corresponding ground truth if available - if dic_gt: - idx_max, iou_max = get_idx_max(box, boxes_gt) - if iou_max > self.IOU_MIN: - dd_real = dds_gt[idx_max] - boxes_gt.pop(idx_max) - dds_gt.pop(idx_max) - # In case of no matching - else: - dd_real = 0 - # In case of no ground truth else: - dd_real = dd_pred + varss = torch.zeros(inputs.size()[0]) - uv_center = uv_centers[idx] - xyz_real = get_depth(uv_center, kk, dd_real) - xyz_pred = get_depth(uv_center, kk, dd_pred) - dic_out['boxes'].append(box) - dic_out['dds_real'].append(dd_real) - dic_out['dds_pred'].append(dd_pred) - dic_out['stds_ale'].append(ale) - dic_out['stds_epi'].append(var_y) - dic_out['xyz_real'].append(xyz_real) - dic_out['xyz_pred'].append(xyz_pred) - dic_out['xy_kps'].append(xy_kps[idx]) - dic_out['uv_kps'].append(uv_kps[idx]) - dic_out['uv_centers'].append(uv_center) - dic_out['uv_shoulders'].append(uv_shoulders[idx]) - - return dic_out + # Don't use dropout for the mean prediction + outputs = self.model(inputs) + outputs = unnormalize_bi(outputs) + return outputs, varss diff --git a/src/predict/predict.py b/src/predict/predict.py index 174b97b..56f0ecb 100644 --- a/src/predict/predict.py +++ b/src/predict/predict.py @@ -86,7 +86,7 @@ def predict(args): processor = decoder.factory_from_args(args, model_pifpaf) # load monoloco - monoloco = MonoLoco(model=args.model, device=args.device, n_dropout=args.n_dropout) + monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) # data data = ImageList(args.images, scale=args.scale) @@ -146,11 +146,13 @@ def predict(args): im_name = os.path.basename(image_path) - kk, _ = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt) + kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt) # Preprocess pifpaf outputs and run monoloco boxes, keypoints = preprocess_pif(pifpaf_out, im_size) - monoloco_outputs = monoloco.forward(boxes, keypoints, kk) + outputs, varss = monoloco.forward(keypoints, kk) + monoloco_outputs = [outputs, varss, boxes, keypoints, kk, dic_gt] + else: monoloco_outputs = None kk = None diff --git a/src/utils/camera.py b/src/utils/camera.py index 24c2891..eaab69a 100644 --- a/src/utils/camera.py +++ b/src/utils/camera.py @@ -1,17 +1,29 @@ import numpy as np import math +import torch +import torch.nn.functional as F -def pixel_to_camera(uv1, kk, z_met): +def pixel_to_camera(uv_tensor, kk, z_met): """ - (3,) array --> (3,) array - Convert a point in pixel coordinate to absolute camera coordinates + Convert a tensor in pixel coordinate to absolute camera coordinates + It accepts lists or tensors of (m, 2) or (m, x, 2) or (m, 2, x) + where x is the number of keypoints """ + if type(uv_tensor) == list: + uv_tensor = torch.tensor(uv_tensor) + if type(kk) == list: + kk = torch.tensor(kk) + if uv_tensor.size()[-1] != 2: + uv_tensor = uv_tensor.permute(0, 2, 1) # permute to have 2 as last dim to be padded + assert uv_tensor.size()[-1] == 2, "Tensor size not recognized" + uv_padded = F.pad(uv_tensor, pad=(0, 1), mode="constant", value=1) # pad only last-dim below with value 1 - kk_1 = np.linalg.inv(kk) - xyz_met_norm = np.dot(kk_1, uv1) + kk_1 = torch.inverse(kk) + xyz_met_norm = torch.matmul(uv_padded, kk_1.t()) # More general than torch.mm xyz_met = xyz_met_norm * z_met + return xyz_met @@ -28,9 +40,7 @@ def project_3d(box_obj, kk): """ Project a 3D bounding box into the image plane using the central corners """ - box_2d = [] - # Obtain the 3d points of the box xc, yc, zc = box_obj.center ww, ll, hh, = box_obj.wlh @@ -55,59 +65,39 @@ def project_3d(box_obj, kk): return box_2d -def preprocess_single(kps, kk): - - """ Preprocess input of a single annotations - Input_kps = list of 4 elements with 0=x, 1=y, 2= confidence, 3 = ? in pixels - Output_kps = [x0, y0, x1,...x15, y15] in meters normalized (z=1) and zero-centered using the center of the box +def get_keypoints(keypoints, mode): """ + Extract center, shoulder or hip points of a keypoint + Input --> list or torch.tensor [(m, 3, 17) or (3, 17)] + Output --> torch.tensor [(m, 2)] + """ + if type(keypoints) == list: + keypoints = torch.tensor(keypoints) + if len(keypoints.size()) == 2: # add batch dim + keypoints = keypoints.unsqueeze(0) - kps_uv = [] - kps_0c = [] - kps_orig = [] - - # Create center of the bounding box using min max of the keypoints - uu_c, vv_c = get_keypoints(kps[0], kps[1], mode='center') - uv_center = np.array([uu_c, vv_c, 1]) - - # Create a list of single arrays of (u, v, 1) - for idx, _ in enumerate(kps[0]): - uv_kp = np.array([kps[0][idx], kps[1][idx], 1]) - kps_uv.append(uv_kp) - - # Projection in normalized image coordinates and zero-center with the center of the bounding box - xy1_center = pixel_to_camera(uv_center, kk, 1) * 10 - for idx, kp in enumerate(kps_uv): - kp_proj = pixel_to_camera(kp, kk, 1) * 10 - kp_proj_0c = kp_proj - xy1_center - kps_0c.append(float(kp_proj_0c[0])) - kps_0c.append(float(kp_proj_0c[1])) - - kp_orig = pixel_to_camera(kp, kk, 1) - kps_orig.append(float(kp_orig[0])) - kps_orig.append(float(kp_orig[1])) - - return kps_0c, kps_orig - - -def get_keypoints(kps_0, kps_1, mode): - """Get the center of 2 lists""" - - assert mode == 'center' or mode == 'shoulder' or mode == 'hip' + assert len(keypoints.size()) == 3 and keypoints.size()[1] == 3, "tensor dimensions not recognized" + assert mode in ['center', 'head', 'shoulder', 'hip' , 'ankle'] + kps_in = keypoints[:, 0:2, :] # (m, 2, 17) if mode == 'center': - uu = (max(kps_0) - min(kps_0)) / 2 + min(kps_0) - vv = (max(kps_1) - min(kps_1)) / 2 + min(kps_1) + kps_max, _ = kps_in.max(2) # returns value, indices + kps_min, _ = kps_in.min(2) + kps_out = (kps_max - kps_min) / 2 + kps_min # (m, 2) as keepdims is False + + elif mode == 'head': + kps_out = kps_in[:, :, 0:5].mean(2) elif mode == 'shoulder': - uu = float(np.average(kps_0[5:7])) - vv = float(np.average(kps_1[5:7])) + kps_out = kps_in[:, :, 5:7].mean(2) elif mode == 'hip': - uu = float(np.average(kps_0[11:13])) - vv = float(np.average(kps_1[11:13])) + kps_out = kps_in[:, :, 11:13].mean(2) - return uu, vv + elif mode == 'ankle': + kps_out = kps_in[:, :, 15:17].mean(2) + + return kps_out # (m, 2) def transform_kp(kps, tr_mode): @@ -118,7 +108,7 @@ def transform_kp(kps, tr_mode): or tr_mode == 'shoulder' or tr_mode == 'knee' or tr_mode == 'upside' or tr_mode == 'falling' \ or tr_mode == 'random' - uu_c, vv_c = get_keypoints(kps[0], kps[1], mode='center') + uu_c, vv_c = get_keypoints(kps, mode='center') if tr_mode == "None": return kps @@ -180,25 +170,33 @@ def transform_kp(kps, tr_mode): return [uus, vvs, kps[2], []] -def get_depth(uv_center, kk, dd): +def xyz_from_distance(distances, xy_centers): + """ + From distances and normalized image coordinates (z=1), extract the real world position xyz + distances --> tensor (m,1) or (m) or float + xy_centers --> tensor(m,3) or (3) + """ - if len(uv_center) == 2: - uv_center.extend([1]) - uv_center_np = np.array(uv_center) - xyz_norm = pixel_to_camera(uv_center, kk, 1) - zz = dd / math.sqrt(1 + xyz_norm[0] ** 2 + xyz_norm[1] ** 2) + if type(distances) == float: + distances = torch.tensor(distances).unsqueeze(0) + if len(distances.size()) == 1: + distances = torch.tensor(distances).unsqueeze(1) + if len(xy_centers.size()) == 1: + xy_centers = xy_centers.unsqueeze(0) - xyz = pixel_to_camera(uv_center_np, kk, zz).tolist() - return xyz + assert xy_centers.size()[-1] == 3 and distances.size()[-1] == 1, "Size of tensor not recognized" + + return xy_centers * distances / torch.sqrt(1 + xy_centers[:, 0:1].pow(2) + xy_centers[:, 1:2].pow(2)) -def get_depth_from_distance(outputs, xy_centers): - - list_zzs = [] - for idx, _ in enumerate(outputs): - dd = float(outputs[idx][0]) - xx_1 = float(xy_centers[idx][0]) - yy_1 = float(xy_centers[idx][1]) - zz = dd / math.sqrt(1 + xx_1 ** 2 + yy_1 ** 2) - list_zzs.append(zz) - return list_zzs +def pixel_to_camera_old(uv1, kk, z_met): + """ + (3,) array --> (3,) array + Convert a point in pixel coordinate to absolute camera coordinates + """ + if len(uv1) == 2: + uv1.append(1) + kk_1 = np.linalg.inv(kk) + xyz_met_norm = np.dot(kk_1, uv1) + xyz_met = xyz_met_norm * z_met + return xyz_met diff --git a/src/utils/kitti.py b/src/utils/kitti.py index 7f9304c..40d3420 100644 --- a/src/utils/kitti.py +++ b/src/utils/kitti.py @@ -1,37 +1,7 @@ import numpy as np -import copy import math -from utils.camera import pixel_to_camera, get_keypoints -from eval.geom_baseline import compute_distance_single - - -def eval_geometric(uv_kps, uv_centers, uv_shoulders, kk, average_y=0.48): - """ - Evaluate geometric distance - """ - xy_centers = [] - dds_geom = [] - for idx, _ in enumerate(uv_centers): - uv_center = copy.deepcopy(uv_centers[idx]) - uv_center.append(1) - uv_shoulder = copy.deepcopy(uv_shoulders[idx]) - uv_shoulder.append(1) - uv_kp = uv_kps[idx] - xy_center = pixel_to_camera(uv_center, kk, 1) - xy_centers.append(xy_center.tolist()) - - uu_2, vv_2 = get_keypoints(uv_kp[0], uv_kp[1], mode='hip') - uv_hip = [uu_2, vv_2, 1] - - zz, _ = compute_distance_single(uv_shoulder, uv_hip, kk, average_y) - xyz_center = np.array([xy_center[0], xy_center[1], zz]) - dd_geom = float(np.linalg.norm(xyz_center)) - dds_geom.append(dd_geom) - - return dds_geom, xy_centers - def get_calibration(path_txt): """Read calibration parameters from txt file: @@ -71,17 +41,17 @@ def get_calibration(path_txt): def get_translation(pp): - """Separate intrinsic matrix from translation""" + """Separate intrinsic matrix from translation and convert in lists""" kk = pp[:, :-1] f_x = kk[0, 0] f_y = kk[1, 1] x0, y0 = kk[2, 0:2] aa, bb, t3 = pp[0:3, 3] - t1 = (aa - x0*t3) / f_x - t2 = (bb - y0*t3) / f_y - tt = np.array([t1, t2, t3]).reshape(3, 1) - return kk, tt + t1 = float((aa - x0*t3) / f_x) + t2 = float((bb - y0*t3) / f_y) + tt = [t1, t2, float(t3)] + return kk.tolist(), tt def get_simplified_calibration(path_txt): @@ -99,12 +69,11 @@ def get_simplified_calibration(path_txt): raise ValueError('Matrix K_02 not found in the file') -def check_conditions(line, mode, thresh=0.5): - +def check_conditions(line, mode, thresh=0.3): """Check conditions of our or m3d txt file""" check = False - assert mode == 'gt' or mode == 'm3d' or mode == '3dop' or mode == 'our', "Type not recognized" + assert mode in ['gt', 'gt_all', 'm3d', '3dop','our'], "Mode %r not recognized" % mode if mode == 'm3d' or mode == '3dop': conf = line.split()[15] @@ -116,8 +85,13 @@ def check_conditions(line, mode, thresh=0.5): if line[:10] == 'Pedestrian': check = True + # Consider also person sitting and cyclists categories + elif mode == 'gt_all': + if line[:10] == 'Pedestrian' or line[:10] == 'Person_sit' or line[:7] == 'Cyclist': + check = True + elif mode == 'our': - if line[10] >= thresh: + if line[4] >= thresh: check = True return check @@ -126,7 +100,6 @@ def check_conditions(line, mode, thresh=0.5): def get_category(box, trunc, occ): hh = box[3] - box[1] - if hh >= 40 and trunc <= 0.15 and occ <= 0: cat = 'easy' elif trunc <= 0.3 and occ <= 1 and hh >= 25: @@ -135,7 +108,6 @@ def get_category(box, trunc, occ): cat = 'hard' else: cat = 'excluded' - return cat @@ -158,7 +130,7 @@ def split_training(names_gt, path_train, path_val): return set_train, set_val -def parse_ground_truth(path_gt): +def parse_ground_truth(path_gt, mode='gt'): """Parse KITTI ground truth files""" boxes_gt = [] dds_gt = [] @@ -168,7 +140,7 @@ def parse_ground_truth(path_gt): with open(path_gt, "r") as f_gt: for line_gt in f_gt: - if check_conditions(line_gt, mode='gt'): + if check_conditions(line_gt, mode=mode): truncs_gt.append(float(line_gt.split()[1])) occs_gt.append(int(line_gt.split()[2])) boxes_gt.append([float(x) for x in line_gt.split()[4:8]]) @@ -177,4 +149,4 @@ def parse_ground_truth(path_gt): boxes_3d.append(loc_gt + wlh) dds_gt.append(math.sqrt(loc_gt[0] ** 2 + loc_gt[1] ** 2 + loc_gt[2] ** 2)) - return (boxes_gt, boxes_3d, dds_gt, truncs_gt, occs_gt) + return boxes_gt, boxes_3d, dds_gt, truncs_gt, occs_gt diff --git a/src/utils/misc.py b/src/utils/misc.py index aa1a3fa..02fed23 100644 --- a/src/utils/misc.py +++ b/src/utils/misc.py @@ -3,7 +3,7 @@ import numpy as np import torch import time import logging -# from shapely.geometry import box as Sbox + def set_logger(log_path): """Set the logger to log info in terminal and file `log_path`. @@ -70,7 +70,6 @@ def get_iou_matrix(boxes, boxes_gt): Dim: (boxes, boxes_gt) """ iou_matrix = np.zeros((len(boxes), len(boxes_gt))) - for idx, box in enumerate(boxes): for idx_gt, box_gt in enumerate(boxes_gt): iou_matrix[idx, idx_gt] = calculate_iou(box, box_gt) @@ -96,36 +95,21 @@ def get_iou_matches(boxes, boxes_gt, thresh): return matches -def reparametrize_box3d(box): - """Reparametrized 3D box in the XZ plane and add the height""" +def reorder_matches(matches, boxes, mode='left_rigth'): + """ + Reorder a list of (idx, idx_gt) matches based on position of the detections in the image + ordered_boxes = (5, 6, 7, 0, 1, 4, 2, 4) + matches = [(0, x), (2,x), (4,x), (3,x), (5,x)] + Output --> [(5, x), (0, x), (3, x), (2, x), (5, x)] + """ - hh, ww, ll = box[0:3] - x_c, y_c, z_c = box[3:6] + assert mode == 'left_right' - x1 = x_c - ll/2 - z1 = z_c - ww/2 - x2 = x_c + ll/2 - z2 = z_c + ww / 2 + # Order the boxes based on the left-right position in the image and + ordered_boxes = np.argsort([box[0] for box in boxes]) # indices of boxes ordered from left to right + matches_left = [idx for (idx, _) in matches] - return [x1, z1, x2, z2, hh] - - -# def calculate_iou3d(box3d_1, box3d_2): -# """3D intersection over union. Boxes are parametrized as x1, z1, x2, z2, hh -# We compute 2d iou in the birds plane and then add a factor for height differences (0-1)""" -# -# poly1 = Sbox(box3d_1[0], box3d_1[1], box3d_1[2], box3d_1[3]) -# poly2 = Sbox(box3d_2[0], box3d_2[1], box3d_2[2], box3d_2[3]) -# -# inter_2d = poly1.intersection(poly2).area -# union_2d = poly1.area + poly2.area - inter_2d -# -# # height_factor = 1 - abs(box3d_1[4] - box3d_2[4]) / max(box3d_1[4], box3d_2[4]) -# -# # -# iou_3d = inter_2d / union_2d # * height_factor -# -# return iou_3d + return [matches[matches_left.index(idx_boxes)] for idx_boxes in ordered_boxes if idx_boxes in matches_left] def laplace_sampling(outputs, n_samples): @@ -135,7 +119,6 @@ def laplace_sampling(outputs, n_samples): mu = outputs[:, 0] bi = torch.abs(outputs[:, 1]) - # Analytical # uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0]) # xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu)) @@ -148,30 +131,13 @@ def laplace_sampling(outputs, n_samples): device = torch.device(type="cuda", index=get_device) else: device = torch.device("cpu") - t1 = time.time() xxs = torch.empty((0, mu.shape[0])).to(device) - t2 = time.time() - laplace = torch.distributions.Laplace(mu, bi) - t3 = time.time() for ii in range(1): xx = laplace.sample((n_samples,)) - t4a = time.time() xxs = torch.cat((xxs, xx.view(n_samples, -1)), 0) - t4 = time.time() - # time_tot = t4 - t0 - # time_1 = t1 - t0 - # time_2 = t2 - t1 - # time_3 = t3 - t2 - # time_4a = t4a - t3 - # time_4 = t4 - t3 - # print("Time 1: {:.1f}%".format(time_1 / time_tot * 100)) - # print("Time 2: {:.1f}%".format(time_2 / time_tot * 100)) - # print("Time 3: {:.1f}%".format(time_3 / time_tot * 100)) - # print("Time 4a: {:.1f}%".format(time_4a / time_tot * 100)) - # print("Time 4: {:.1f}%".format(time_4 / time_tot * 100)) return xxs @@ -191,48 +157,30 @@ def append_cluster(dic_jo, phase, xx, dd, kps): """Append the annotation based on its distance""" - # if dd <= 6: - # dic_jo[phase]['clst']['6']['kps'].append(kps) - # dic_jo[phase]['clst']['6']['X'].append(xx) - # dic_jo[phase]['clst']['6']['Y'].append([dd]) # Trick to make it (nn,1) instead of (nn, ) - if dd <= 10: dic_jo[phase]['clst']['10']['kps'].append(kps) dic_jo[phase]['clst']['10']['X'].append(xx) dic_jo[phase]['clst']['10']['Y'].append([dd]) - # elif dd <= 15: - # dic_jo[phase]['clst']['15']['kps'].append(kps) - # dic_jo[phase]['clst']['15']['X'].append(xx) - # dic_jo[phase]['clst']['15']['Y'].append([dd]) - elif dd <= 20: dic_jo[phase]['clst']['20']['kps'].append(kps) dic_jo[phase]['clst']['20']['X'].append(xx) dic_jo[phase]['clst']['20']['Y'].append([dd]) - # elif dd <= 25: - # dic_jo[phase]['clst']['25']['kps'].append(kps) - # dic_jo[phase]['clst']['25']['X'].append(xx) - # dic_jo[phase]['clst']['25']['Y'].append([dd]) - elif dd <= 30: dic_jo[phase]['clst']['30']['kps'].append(kps) dic_jo[phase]['clst']['30']['X'].append(xx) dic_jo[phase]['clst']['30']['Y'].append([dd]) - # elif dd <= 40: - # dic_jo[phase]['clst']['40']['kps'].append(kps) - # dic_jo[phase]['clst']['40']['X'].append(xx) - # dic_jo[phase]['clst']['40']['Y'].append([dd]) - # - # elif dd <= 50: - # dic_jo[phase]['clst']['50']['kps'].append(kps) - # dic_jo[phase]['clst']['50']['X'].append(xx) - # dic_jo[phase]['clst']['50']['Y'].append([dd]) - else: dic_jo[phase]['clst']['>30']['kps'].append(kps) dic_jo[phase]['clst']['>30']['X'].append(xx) dic_jo[phase]['clst']['>30']['Y'].append([dd]) + +def get_task_error(dd): + """Get target error not knowing the gender""" + mm_gender = 0.0556 + return mm_gender * dd + + diff --git a/src/utils/normalize.py b/src/utils/normalize.py index 3f0aca9..775d978 100644 --- a/src/utils/normalize.py +++ b/src/utils/normalize.py @@ -40,68 +40,3 @@ def unnormalize_bi(outputs): outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0] return outputs - - -# def normalize_arrays_jo(dic_jo): -# """Normalize according to the mean and std of each keypoint in the training dataset -# PS normalization of training also for test and val""" -# -# # Normalize -# phase = 'train' -# kps_orig_tr = np.array(dic_jo[phase]['X']) -# # dd_orig_tr = np.array(dic_jo[phase]['Y']).reshape(-1, 1) -# kps_mean = np.mean(kps_orig_tr, axis=0) -# plt.hist(kps_orig_tr, bins=100) -# plt.show() -# kps_std = np.std(kps_orig_tr, axis=0) -# # dd_mean = np.mean(dd_orig_tr, axis=0) -# # dd_std = np.std(dd_orig_tr, axis=0) -# -# for phase in dic_jo: -# -# # Compute the normalized arrays -# kps_orig = np.array(dic_jo[phase]['X']) -# dd_orig = np.array(dic_jo[phase]['Y']).reshape(-1, 1) -# kps_norm = np.divide((kps_orig - kps_mean), kps_std) -# -# # dd_norm = np.divide((dd_orig - dd_mean), dd_std) # ! No normalization on the output -# -# # Substitute the new values in the dictionary and save the mean and std -# dic_jo[phase]['X'] = kps_norm.tolist() -# dic_jo[phase]['mean']['X'] = kps_mean.tolist() -# dic_jo[phase]['std']['X'] = kps_std.tolist() -# -# dic_jo[phase]['Y'] = dd_orig.tolist() -# # dic_jo[phase]['mean']['Y'] = float(dd_mean) -# # dic_jo[phase]['std']['Y'] = float(dd_std) -# -# # Normalize all the clusters -# for clst in dic_jo[phase]['clst']: -# -# # Extract -# kps_orig = np.array(dic_jo[phase]['clst'][clst]['X']) -# dd_orig = np.array(dic_jo[phase]['clst'][clst]['Y']).reshape(-1, 1) -# # Normalize -# kps_norm = np.divide((kps_orig - kps_mean), kps_std) -# -# # dd_norm = np.divide((dd_orig - dd_mean), dd_std) #! No normalization on the output -# -# # Put back -# dic_jo[phase]['clst'][clst]['X'] = kps_norm.tolist() -# dic_jo[phase]['clst'][clst]['Y'] = dd_orig.tolist() -# -# return dic_jo -# -# -# def check_cluster_dim(dic_jo): -# """ Check that the sum of the clusters corresponds to all annotations""" -# -# for phase in ['train', 'val', 'test']: -# cnt_clst = 0 -# cnt_all = len(dic_jo[phase]['X']) -# for clst in dic_jo[phase]['clst']: -# cnt_clst += len(dic_jo[phase]['clst'][clst]['X']) -# assert cnt_all == cnt_clst - - - diff --git a/src/utils/pifpaf.py b/src/utils/pifpaf.py index de5cc56..43989bf 100644 --- a/src/utils/pifpaf.py +++ b/src/utils/pifpaf.py @@ -1,6 +1,7 @@ import numpy as np -from utils.camera import preprocess_single, get_keypoints, pixel_to_camera +import torch +from utils.camera import get_keypoints, pixel_to_camera def preprocess_pif(annotations, im_size=None): @@ -45,56 +46,75 @@ def preprocess_pif(annotations, im_size=None): return boxes, keypoints -def get_input_data(boxes, keypoints, kk, left_to_right=False): - inputs = [] - xy_centers = [] - uv_boxes = [] - uv_centers = [] - uv_shoulders = [] - uv_kps = [] - xy_kps = [] +def get_network_inputs(keypoints, kk): - if left_to_right: # Order boxes from left to right - ordered = np.argsort([xx[0] for xx in boxes]) + """ Preprocess batches of inputs + keypoints = torch tensors of (m, 3, 17) or list [3,17] + Outputs = torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box + """ + if type(keypoints) == list: + keypoints = torch.tensor(keypoints) + if type(kk) == list: + kk = torch.tensor(kk) + # Projection in normalized image coordinates and zero-center with the center of the bounding box + uv_center = get_keypoints(keypoints, mode='center') + xy1_center = pixel_to_camera(uv_center, kk, 1) * 10 + xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 1) * 10 + kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3) + kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view + return kps_out - else: # Order boxes from most to less confident - confs = [] - for idx, box in enumerate(boxes): - confs.append(box[4]) - ordered = np.argsort(confs).tolist()[::-1] - for idx in ordered: - kps = keypoints[idx] - uv_kps.append(kps) - uv_boxes.append(boxes[idx]) +def preprocess_pif(annotations, im_size=None): + """ + Preprocess pif annotations: + 1. enlarge the box of 10% + 2. Constraint it inside the image (if image_size provided) + """ - uu_c, vv_c = get_keypoints(kps[0], kps[1], "center") - uv_centers.append([round(uu_c), round(vv_c)]) - xy_center = pixel_to_camera(np.array([uu_c, vv_c, 1]), kk, 1) - xy_centers.append(xy_center) + boxes = [] + keypoints = [] - uu_1, vv_1 = get_keypoints(kps[0], kps[1], "shoulder") - uv_shoulders.append([round(uu_1), round(vv_1)]) + for dic in annotations: + box = dic['bbox'] + if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0) + return [], [] - # 2 steps of input normalization for each instance - kps_prep, kps_orig = preprocess_single(kps, kk) - inputs.append(kps_prep) - xy_kps.append(kps_orig) + else: + kps = prepare_pif_kps(dic['keypoints']) + conf = float(np.mean(np.array(kps[2]))) - return (inputs, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) + # Add 10% for y + delta_h = (box[3] - box[1]) / 10 + delta_w = (box[2] - box[0]) / 10 + assert delta_h > 0 and delta_w > 0, "Bounding box <=0" + box[0] -= delta_w + box[1] -= delta_h + box[2] += delta_w + box[3] += delta_h + + # Put the box inside the image + if im_size is not None: + box[0] = max(0, box[0]) + box[1] = max(0, box[1]) + box[2] = min(box[2], im_size[0]) + box[3] = min(box[3], im_size[1]) + + box.append(conf) + boxes.append(box) + keypoints.append(kps) + + return boxes, keypoints def prepare_pif_kps(kps_in): """Convert from a list of 51 to a list of 3, 17""" - keypoints = np.array(kps_in).reshape(-1, 3).tolist() - xxs = [] - yys = [] - ccs = [] - - for kp_triple in keypoints: - xxs.append(kp_triple[0]) - yys.append(kp_triple[1]) - ccs.append(kp_triple[2]) + assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3" + xxs = kps_in[0:][::3] + yys = kps_in[1:][::3] # from offset 1 every 3 + ccs = kps_in[2:][::3] return [xxs, yys, ccs] + + diff --git a/src/visuals/printer.py b/src/visuals/printer.py index ef24bc9..6a92838 100644 --- a/src/visuals/printer.py +++ b/src/visuals/printer.py @@ -1,15 +1,18 @@ -import os import math import numpy as np +import torch +import cv2 + import matplotlib.pyplot as plt import matplotlib.cm as cm import matplotlib from mpl_toolkits.axes_grid1 import make_axes_locatable from matplotlib.patches import Ellipse, Circle -import cv2 + from collections import OrderedDict from utils.camera import pixel_to_camera +from utils.misc import get_task_error class Printer: @@ -158,13 +161,13 @@ class Printer: # Create bird or combine it with front) if any(xx in self.output_types for xx in ['bird', 'combined']): - uv_max = np.array([0, self.hh, 1]) + uv_max = [0., float(self.hh)] xyz_max = pixel_to_camera(uv_max, self.kk, self.z_max) x_max = abs(xyz_max[0]) # shortcut to avoid oval circles in case of different kk for idx, _ in enumerate(self.xx_gt): if self.zz_gt[idx] > 0: - target = get_target_error(self.dds_real[idx]) + target = get_task_error(self.dds_real[idx]) angle = get_angle(self.xx_gt[idx], self.zz_gt[idx]) ellipse_real = Ellipse((self.xx_gt[idx], self.zz_gt[idx]), width=target * 2, height=1, @@ -270,9 +273,3 @@ def get_angle(xx, zz): angle = theta * (180 / math.pi) return angle - - -def get_target_error(dd): - """Get target error not knowing the gender""" - mm_gender = 0.0556 - return mm_gender * dd