diff --git a/monoloco/__init__.py b/monoloco/__init__.py index e9cc208..fa8a434 100644 --- a/monoloco/__init__.py +++ b/monoloco/__init__.py @@ -1,4 +1,4 @@ """Open implementation of MonoLoco.""" -__version__ = '0.4.5' +__version__ = '0.4.6' diff --git a/monoloco/eval/eval_kitti.py b/monoloco/eval/eval_kitti.py index 64f803c..2368881 100644 --- a/monoloco/eval/eval_kitti.py +++ b/monoloco/eval/eval_kitti.py @@ -15,7 +15,7 @@ from tabulate import tabulate from ..utils import get_iou_matches, get_task_error, get_pixel_error, check_conditions, get_category, split_training, \ parse_ground_truth -from ..visuals import show_results, show_spread +from ..visuals import show_results, show_spread, show_task_error class EvalKitti: @@ -23,22 +23,22 @@ class EvalKitti: logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) CLUSTERS = ('easy', 'moderate', 'hard', 'all', '6', '10', '15', '20', '25', '30', '40', '50', '>50') - METHODS = ['m3d', 'md', 'geom', 'task_error', '3dop', 'our'] - HEADERS = ['method', '<0.5', '<1m', '<2m', 'easy', 'moderate', 'hard', 'all'] - CATEGORIES = ['pedestrian'] + ALP_THRESHOLDS = ('<0.5m', '<1m', '<2m') + METHODS_MONO = ['m3d', 'monodepth', '3dop', 'monoloco'] + METHODS_STEREO = ['ml_stereo', 'pose', 'reid'] + BASELINES = ['geometric', 'task_error', 'pixel_error'] + HEADERS = ('method', '<0.5', '<1m', '<2m', 'easy', 'moderate', 'hard', 'all') + CATEGORIES = ('pedestrian',) - def __init__(self, thresh_iou_our=0.3, thresh_iou_m3d=0.3, thresh_conf_m3d=0.3, thresh_conf_our=0.3, + def __init__(self, thresh_iou_monoloco=0.3, thresh_iou_base=0.3, thresh_conf_monoloco=0.3, thresh_conf_base=0.3, verbose=False, stereo=False): - self.dir_gt = os.path.join('data', 'kitti', 'gt') - self.dir_m3d = os.path.join('data', 'kitti', 'm3d') - self.dir_3dop = os.path.join('data', 'kitti', '3dop') - self.dir_md = os.path.join('data', 'kitti', 'monodepth') - self.dir_our = os.path.join('data', 'kitti', 'monoloco') + self.main_dir = os.path.join('data', 'kitti') + self.dir_gt = os.path.join(self.main_dir, 'gt') + self.methods = self.METHODS_MONO self.stereo = stereo if self.stereo: - self.dir_our_stereo = os.path.join('data', 'kitti', 'monoloco_stereo') - self.METHODS.extend(['our_stereo', 'pixel_error']) + self.methods.extend(self.METHODS_STEREO) path_train = os.path.join('splits', 'kitti_train.txt') path_val = os.path.join('splits', 'kitti_val.txt') dir_logs = os.path.join('data', 'logs') @@ -49,24 +49,21 @@ class EvalKitti: self.path_results = os.path.join(dir_logs, 'eval-' + now_time + '.json') self.verbose = verbose - assert os.path.exists(self.dir_m3d) and os.path.exists(self.dir_our) \ - and os.path.exists(self.dir_3dop) - - self.dic_thresh_iou = {'m3d': thresh_iou_m3d, '3dop': thresh_iou_m3d, - 'md': thresh_iou_our, 'our': thresh_iou_our, 'our_stereo': thresh_iou_our} - self.dic_thresh_conf = {'m3d': thresh_conf_m3d, '3dop': thresh_conf_m3d, - 'our': thresh_conf_our, 'our_stereo': thresh_conf_our} + self.dic_thresh_iou = {method: (thresh_iou_monoloco if method[:8] == 'monoloco' else thresh_iou_base) + for method in self.methods} + self.dic_thresh_conf = {method: (thresh_conf_monoloco if method[:8] == 'monoloco' else thresh_conf_base) + for method in self.methods} # Extract validation images for evaluation names_gt = tuple(os.listdir(self.dir_gt)) _, self.set_val = split_training(names_gt, path_train, path_val) # Define variables to save statistics + self.dic_methods = None self.errors = None self.dic_stds = None self.dic_stats = None self.dic_cnt = None - self.cnt_stereo_error = None self.cnt_gt = 0 def run(self): @@ -80,40 +77,29 @@ class EvalKitti: self.dic_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))) self.dic_cnt = defaultdict(int) self.cnt_gt = 0 - self.cnt_stereo_error = 0 # Iterate over each ground truth file in the training set for name in self.set_val: path_gt = os.path.join(self.dir_gt, name) - path_m3d = os.path.join(self.dir_m3d, name) - path_our = os.path.join(self.dir_our, name) - if self.stereo: - path_our_stereo = os.path.join(self.dir_our_stereo, name) - path_3dop = os.path.join(self.dir_3dop, name) - path_md = os.path.join(self.dir_md, name) # Iterate over each line of the gt file and save box location and distances out_gt = parse_ground_truth(path_gt, category) + methods_out = defaultdict(tuple) # Save all methods for comparison self.cnt_gt += len(out_gt[0]) - # Extract annotations for the same file if out_gt[0]: - out_m3d = self._parse_txts(path_m3d, category, method='m3d') - out_3dop = self._parse_txts(path_3dop, category, method='3dop') - out_md = self._parse_txts(path_md, category, method='md') - out_our = self._parse_txts(path_our, category, method='our') - out_our_stereo = self._parse_txts(path_our_stereo, category, method='our') if self.stereo else [] + for method in self.methods: + # Extract annotations + dir_method = os.path.join(self.main_dir, method) + assert os.path.exists(dir_method), "directory of the method %s does not exists" % method + path_method = os.path.join(dir_method, name) + methods_out[method] = self._parse_txts(path_method, category, method=method) - # Compute the error with ground truth - self._estimate_error(out_gt, out_m3d, method='m3d') - self._estimate_error(out_gt, out_3dop, method='3dop') - self._estimate_error(out_gt, out_md, method='md') - self._estimate_error(out_gt, out_our, method='our') - if self.stereo: - self._estimate_error(out_gt, out_our_stereo, method='our_stereo') + # Compute the error with ground truth + self._estimate_error(out_gt, methods_out[method], method=method) # Iterate over all the files together to find a pool of common annotations - self._compare_error(out_gt, out_m3d, out_3dop, out_md, out_our, out_our_stereo) + self._compare_error(out_gt, methods_out) # Update statistics of errors and uncertainty for key in self.errors: @@ -129,79 +115,54 @@ class EvalKitti: if save or show: show_results(self.dic_stats, show, save) show_spread(self.dic_stats, show, save) + show_task_error(show, save) def _parse_txts(self, path, category, method): + boxes = [] dds = [] stds_ale = [] stds_epi = [] - dds_geom = [] + dds_geometric = [] + output = (boxes, dds) if method != 'monoloco' else (boxes, dds, stds_ale, stds_epi, dds_geometric) - # Iterate over each line of the txt file - if method in ['3dop', 'm3d']: - try: - with open(path, "r") as ff: - for line in ff: - if check_conditions(line, category, method=method, thresh=self.dic_thresh_conf[method]): - boxes.append([float(x) for x in line.split()[4:8]]) - loc = ([float(x) for x in line.split()[11:14]]) - dds.append(math.sqrt(loc[0] ** 2 + loc[1] ** 2 + loc[2] ** 2)) - self.dic_cnt[method] += 1 - return boxes, dds - - except FileNotFoundError: - return [], [] - - elif method == 'md': - try: - with open(path, "r") as ff: - for line in ff: - box = [float(x[:-1]) for x in line.split()[0:4]] - delta_h = (box[3] - box[1]) / 7 - delta_w = (box[2] - box[0]) / 3.5 - assert delta_h > 0 and delta_w > 0, "Bounding box <=0" - box[0] -= delta_w - box[1] -= delta_h - box[2] += delta_w - box[3] += delta_h + try: + with open(path, "r") as ff: + for line_str in ff: + line = line_str.split() + if check_conditions(line, category, method=method, thresh=self.dic_thresh_conf[method]): + if method == 'monodepth': + box = [float(x[:-1]) for x in line[0:4]] + delta_h = (box[3] - box[1]) / 7 + delta_w = (box[2] - box[0]) / 3.5 + assert delta_h > 0 and delta_w > 0, "Bounding box <=0" + box[0] -= delta_w + box[1] -= delta_h + box[2] += delta_w + box[3] += delta_h + dd = float(line[5][:-1]) + else: + box = [float(x) for x in line[4:8]] + loc = ([float(x) for x in line[11:14]]) + dd = math.sqrt(loc[0] ** 2 + loc[1] ** 2 + loc[2] ** 2) boxes.append(box) - dds.append(float(line.split()[5][:-1])) + dds.append(dd) self.dic_cnt[method] += 1 - return boxes, dds - - except FileNotFoundError: - return [], [] - - else: - assert method == 'our', "method not recognized" - try: - with open(path, "r") as ff: - file_lines = ff.readlines() - for line_our in file_lines[:-1]: - line_list = [float(x) for x in line_our.split()] - - if check_conditions(line_list, category, method=method, thresh=self.dic_thresh_conf[method]): - boxes.append(line_list[:4]) - dds.append(line_list[8]) - stds_ale.append(line_list[9]) - stds_epi.append(line_list[10]) - dds_geom.append(line_list[11]) - self.dic_cnt[method] += 1 - self.dic_cnt['geom'] += 1 - - # kk_list = [float(x) for x in file_lines[-1].split()] - - return boxes, dds, stds_ale, stds_epi, dds_geom - - except FileNotFoundError: - return [], [], [], [], [] + if method == 'monoloco': + stds_ale.append(float(line[16])) + stds_epi.append(float(line[17])) + dds_geometric.append(float(line[18])) + self.dic_cnt['geometric'] += 1 + return output + except FileNotFoundError: + return output def _estimate_error(self, out_gt, out, method): """Estimate localization error""" boxes_gt, _, dds_gt, zzs_gt, truncs_gt, occs_gt = out_gt - if method[:3] == 'our': - boxes, dds, stds_ale, stds_epi, dds_geom = out + if method == 'monoloco': + boxes, dds, stds_ale, stds_epi, dds_geometric = out else: boxes, dds = out @@ -212,62 +173,52 @@ class EvalKitti: cat = get_category(boxes_gt[idx_gt], truncs_gt[idx_gt], occs_gt[idx_gt]) self.update_errors(dds[idx], dds_gt[idx_gt], cat, self.errors[method]) - if method == 'our': - self.update_errors(dds_geom[idx], dds_gt[idx_gt], cat, self.errors['geom']) + if method == 'monoloco': + self.update_errors(dds_geometric[idx], dds_gt[idx_gt], cat, self.errors['geometric']) self.update_uncertainty(stds_ale[idx], stds_epi[idx], dds[idx], dds_gt[idx_gt], cat) dd_task_error = dds_gt[idx_gt] + (get_task_error(dds_gt[idx_gt]))**2 self.update_errors(dd_task_error, dds_gt[idx_gt], cat, self.errors['task_error']) - - elif method == 'our_stereo': dd_pixel_error = get_pixel_error(dds_gt[idx_gt], zzs_gt[idx_gt]) self.update_errors(dd_pixel_error, dds_gt[idx_gt], cat, self.errors['pixel_error']) - def _compare_error(self, out_gt, out_m3d, out_3dop, out_md, out_our, out_our_stereo): + def _compare_error(self, out_gt, methods_out): """Compare the error for a pool of instances commonly matched by all methods""" - - # Extract outputs of each method boxes_gt, _, dds_gt, zzs_gt, truncs_gt, occs_gt = out_gt - boxes_m3d, dds_m3d = out_m3d - boxes_3dop, dds_3dop = out_3dop - boxes_md, dds_md = out_md - boxes_our, dds_our, _, _, dds_geom = out_our - if self.stereo: - boxes_our_stereo, dds_our_stereo, _, _, dds_geom_stereo = out_our_stereo # Find IoU matches - matches_our = get_iou_matches(boxes_our, boxes_gt, self.dic_thresh_iou['our']) - matches_m3d = get_iou_matches(boxes_m3d, boxes_gt, self.dic_thresh_iou['m3d']) - matches_3dop = get_iou_matches(boxes_3dop, boxes_gt, self.dic_thresh_iou['3dop']) - matches_md = get_iou_matches(boxes_md, boxes_gt, self.dic_thresh_iou['md']) + matches = [] + boxes_monoloco = methods_out['monoloco'][0] + matches_monoloco = get_iou_matches(boxes_monoloco, boxes_gt, self.dic_thresh_iou['monoloco']) + + base_methods = [method for method in self.methods if method != 'monoloco'] + for method in base_methods: + boxes = methods_out[method][0] + matches.append(get_iou_matches(boxes, boxes_gt, self.dic_thresh_iou[method])) # Update error of commonly matched instances - for (idx, idx_gt) in matches_our: - check, indices = extract_indices(idx_gt, matches_m3d, matches_3dop, matches_md) + for (idx, idx_gt) in matches_monoloco: + check, indices = extract_indices(idx_gt, *matches) if check: cat = get_category(boxes_gt[idx_gt], truncs_gt[idx_gt], occs_gt[idx_gt]) dd_gt = dds_gt[idx_gt] - self.update_errors(dds_our[idx], dd_gt, cat, self.errors['our_merged']) - self.update_errors(dds_geom[idx], dd_gt, cat, self.errors['geom_merged']) - self.update_errors(dd_gt + get_task_error(dd_gt), dd_gt, cat, self.errors['task_error_merged']) - self.update_errors(dds_m3d[indices[0]], dd_gt, cat, self.errors['m3d_merged']) - self.update_errors(dds_3dop[indices[1]], dd_gt, cat, self.errors['3dop_merged']) - self.update_errors(dds_md[indices[2]], dd_gt, cat, self.errors['md_merged']) - if self.stereo: - self.update_errors(dds_our_stereo[idx], dd_gt, cat, self.errors['our_stereo_merged']) - dd_pixel = get_pixel_error(dd_gt, zzs_gt[idx_gt]) - self.update_errors(dd_pixel, dd_gt, cat, self.errors['pixel_error_merged']) - error = abs(dds_our[idx] - dd_gt) - error_stereo = abs(dds_our_stereo[idx] - dd_gt) - if error_stereo > (error + 0.1): - self.cnt_stereo_error += 1 + for idx_indices, method in enumerate(base_methods): + dd = methods_out[method][1][indices[idx_indices]] + self.update_errors(dd, dd_gt, cat, self.errors[method + '_merged']) - for key in self.METHODS: + dd_monoloco = methods_out['monoloco'][1][idx] + dd_geometric = methods_out['monoloco'][4][idx] + self.update_errors(dd_monoloco, dd_gt, cat, self.errors['monoloco_merged']) + self.update_errors(dd_geometric, dd_gt, cat, self.errors['geometric_merged']) + self.update_errors(dd_gt + get_task_error(dd_gt), dd_gt, cat, self.errors['task_error_merged']) + dd_pixel = get_pixel_error(dd_gt, zzs_gt[idx_gt]) + self.update_errors(dd_pixel, dd_gt, cat, self.errors['pixel_error_merged']) + + for key in self.methods: self.dic_cnt[key + '_merged'] += 1 def update_errors(self, dd, dd_gt, cat, errors): """Compute and save errors between a single box and the gt box which match""" - diff = abs(dd - dd_gt) clst = find_cluster(dd_gt, self.CLUSTERS) errors['all'].append(diff) @@ -346,23 +297,13 @@ class EvalKitti: def show_statistics(self): + all_methods = self.methods + self.BASELINES print('-'*90) - alp = [[str(100 * average(self.errors[key][perc]))[:5] - for perc in ['<0.5m', '<1m', '<2m']] - for key in self.METHODS] - - ale = [[str(self.dic_stats['test'][key + '_merged'][clst]['mean'])[:4] + ' (' + - str(self.dic_stats['test'][key][clst]['mean'])[:4] + ')' - for clst in self.CLUSTERS[:4]] - for key in self.METHODS] - - results = [[key] + alp[idx] + ale[idx] for idx, key in enumerate(self.METHODS)] - print(tabulate(results, headers=self.HEADERS)) - print('-'*90 + '\n') + self.summary_table(all_methods) if self.verbose: - methods_all = list(chain.from_iterable((method, method + '_merged') for method in self.METHODS)) - for key in methods_all: + all_methods_merged = list(chain.from_iterable((method, method + '_merged') for method in all_methods)) + for key in all_methods_merged: for clst in self.CLUSTERS[:4]: print(" {} Average error in cluster {}: {:.2f} with a max error of {:.1f}, " "for {} annotations" @@ -370,13 +311,13 @@ class EvalKitti: self.dic_stats['test'][key][clst]['max'], self.dic_stats['test'][key][clst]['cnt'])) - if key == 'our': + if key == 'monoloco': print("% of annotation inside the confidence interval: {:.1f} %, " "of which {:.1f} % at higher risk" .format(self.dic_stats['test'][key][clst]['interval']*100, self.dic_stats['test'][key][clst]['at_risk']*100)) - for perc in ['<0.5m', '<1m', '<2m']: + for perc in self.ALP_THRESHOLDS: print("{} Instances with error {}: {:.2f} %" .format(key, perc, 100 * average(self.errors[key][perc]))) @@ -385,27 +326,35 @@ class EvalKitti: print("-" * 100) print("\n Annotations inside the confidence interval: {:.1f} %" - .format(self.dic_stats['test']['our']['all']['interval'])) - print("precision 1: {:.2f}".format(self.dic_stats['test']['our']['all']['prec_1'])) - print("precision 2: {:.2f}".format(self.dic_stats['test']['our']['all']['prec_2'])) - if self.stereo: - print("Stereo error greater than mono: {:.1f} %" - .format(100 * self.cnt_stereo_error / self.dic_cnt['our_merged'])) + .format(self.dic_stats['test']['monoloco']['all']['interval'])) + print("precision 1: {:.2f}".format(self.dic_stats['test']['monoloco']['all']['prec_1'])) + print("precision 2: {:.2f}".format(self.dic_stats['test']['monoloco']['all']['prec_2'])) + + def summary_table(self, all_methods): + """Tabulate table for ALP and ALE metrics""" + + alp = [[str(100 * average(self.errors[key][perc]))[:5] + for perc in ['<0.5m', '<1m', '<2m']] + for key in all_methods] + + ale = [[str(self.dic_stats['test'][key + '_merged'][clst]['mean'])[:4] + ' (' + + str(self.dic_stats['test'][key][clst]['mean'])[:4] + ')' + for clst in self.CLUSTERS[:4]] + for key in all_methods] + + results = [[key] + alp[idx] + ale[idx] for idx, key in enumerate(all_methods)] + print(tabulate(results, headers=self.HEADERS)) + print('-' * 90 + '\n') def get_statistics(dic_stats, errors, dic_stds, key): """Update statistics of a cluster""" - try: - dic_stats['mean'] = average(errors) - dic_stats['max'] = max(errors) - dic_stats['cnt'] = len(errors) - except (ZeroDivisionError, ValueError): - dic_stats['mean'] = 0. - dic_stats['max'] = 0. - dic_stats['cnt'] = 0. + dic_stats['mean'] = average(errors) + dic_stats['max'] = max(errors) + dic_stats['cnt'] = len(errors) - if key == 'our': + if key == 'monoloco': dic_stats['std_ale'] = average(dic_stds['ale']) dic_stats['std_epi'] = average(dic_stds['epi']) dic_stats['interval'] = average(dic_stds['interval']) diff --git a/monoloco/eval/generate_kitti.py b/monoloco/eval/generate_kitti.py index c19c36e..4ebb39d 100644 --- a/monoloco/eval/generate_kitti.py +++ b/monoloco/eval/generate_kitti.py @@ -1,14 +1,12 @@ + """Run monoloco over all the pifpaf joints of KITTI images and extract and save the annotations in txt files""" -import math import os import glob -import json import shutil -import itertools -import copy +from collections import defaultdict import numpy as np import torch @@ -16,176 +14,146 @@ import torch from ..network import MonoLoco from ..network.process import preprocess_pifpaf from ..eval.geom_baseline import compute_distance -from ..utils import get_keypoints, pixel_to_camera, xyz_from_distance, get_calibration, depth_from_disparity +from ..utils import get_keypoints, pixel_to_camera, xyz_from_distance, get_calibration, open_annotations, split_training +from .stereo_baselines import baselines_association +from .reid_baseline import ReID, get_reid_features class GenerateKitti: - def __init__(self, model, dir_ann, p_dropout=0.2, n_dropout=0): + def __init__(self, model, dir_ann, p_dropout=0.2, n_dropout=0, stereo=True): # Load monoloco use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") self.monoloco = MonoLoco(model=model, device=device, n_dropout=n_dropout, p_dropout=p_dropout) - self.dir_out = os.path.join('data', 'kitti', 'monoloco') self.dir_ann = dir_ann - # List of images - self.list_basename = factory_basename(dir_ann) + # Extract list of pifpaf files in validation images + dir_gt = os.path.join('data', 'kitti', 'gt') + self.set_basename = factory_basename(dir_ann, dir_gt) self.dir_kk = os.path.join('data', 'kitti', 'calib') - def run_mono(self): + # Calculate stereo baselines + self.stereo = stereo + if stereo: + self.baselines = ['ml_stereo', 'pose', 'reid'] + self.cnt_disparity = defaultdict(int) + self.cnt_no_stereo = 0 + + # ReID Baseline + weights_path = 'data/models/reid_model_market.pkl' + self.reid_net = ReID(weights_path=weights_path, device=device, num_classes=751, height=256, width=128) + self.dir_images = os.path.join('data', 'kitti', 'images') + self.dir_images_r = os.path.join('data', 'kitti', 'images_r') + + def run(self): """Run Monoloco and save txt files for KITTI evaluation""" cnt_ann = cnt_file = cnt_no_file = 0 - dir_out = os.path.join('data', 'kitti', 'monoloco') - # Remove the output directory if alreaady exists (avoid residual txt files) - if os.path.exists(dir_out): - shutil.rmtree(dir_out) - os.makedirs(dir_out) + dir_out = {"monoloco": os.path.join('data', 'kitti', 'monoloco')} + make_new_directory(dir_out["monoloco"]) print("\nCreated empty output directory for txt files") + if self.stereo: + for key in self.baselines: + dir_out[key] = os.path.join('data', 'kitti', key) + make_new_directory(dir_out[key]) + print("Created empty output directory for {}".format(key)) + print("\n") + # Run monoloco over the list of images - for basename in self.list_basename: + for basename in self.set_basename: path_calib = os.path.join(self.dir_kk, basename + '.txt') annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename) boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374)) + assert keypoints, "all pifpaf files should have at least one annotation" + cnt_ann += len(boxes) + cnt_file += 1 - if not keypoints: - cnt_no_file += 1 - continue - else: - # Run the network and the geometric baseline - outputs, varss = self.monoloco.forward(keypoints, kk) - dds_geom = eval_geometric(keypoints, kk, average_y=0.48) + # Run the network and the geometric baseline + outputs, varss = self.monoloco.forward(keypoints, kk) + dds_geom = eval_geometric(keypoints, kk, average_y=0.48) # Save the file uv_centers = get_keypoints(keypoints, mode='bottom') # Kitti uses the bottom center to calculate depth xy_centers = pixel_to_camera(uv_centers, kk, 1) outputs = outputs.detach().cpu() zzs = xyz_from_distance(outputs[:, 0:1], xy_centers)[:, 2].tolist() + all_outputs = [outputs.detach().cpu(), varss.detach().cpu(), dds_geom, zzs] all_inputs = [boxes, xy_centers] all_params = [kk, tt] - path_txt = os.path.join(dir_out, basename + '.txt') - save_txts(path_txt, all_inputs, all_outputs, all_params) + path_txt = {'monoloco': os.path.join(dir_out['monoloco'], basename + '.txt')} + save_txts(path_txt['monoloco'], all_inputs, all_outputs, all_params) - # Update counting - cnt_ann += len(boxes) - cnt_file += 1 - print("Saved in {} txt {} annotations. Not found {} images\n".format(cnt_file, cnt_ann, cnt_no_file)) + # Correct using stereo disparity and save in different folder + if self.stereo: + zzs = self._run_stereo_baselines(basename, boxes, keypoints, zzs, path_calib) + for key in zzs: + path_txt[key] = os.path.join(dir_out[key], basename + '.txt') + save_txts(path_txt[key], all_inputs, zzs[key], all_params, mode='baseline') - def run_stereo(self): - """Run monoloco on left and right images and alculate disparity if a match is found""" + print("\nSaved in {} txt {} annotations. Not found {} images".format(cnt_file, cnt_ann, cnt_no_file)) - cnt_ann = cnt_file = cnt_no_file = cnt_no_stereo = cnt_disparity = 0 - dir_out = os.path.join('data', 'kitti', 'monoloco_stereo') + if self.stereo: + print("STEREO:") + for key in self.baselines: + print("Annotations corrected using {} baseline: {:.1f}%".format( + key, self.cnt_disparity[key] / cnt_ann * 100)) + print("Maximum possible stereo associations: {:.1f}%".format(self.cnt_disparity['max'] / cnt_ann * 100)) + print("Not found {}/{} stereo files".format(self.cnt_no_stereo, cnt_file)) - # Remove the output directory if alreaady exists (avoid residual txt files) - if os.path.exists(dir_out): - shutil.rmtree(dir_out) - os.makedirs(dir_out) - print("Created empty output directory for txt STEREO files") + def _run_stereo_baselines(self, basename, boxes, keypoints, zzs, path_calib): - for basename in self.list_basename: - path_calib = os.path.join(self.dir_kk, basename + '.txt') - stereo = True + annotations_r, _, _ = factory_file(path_calib, self.dir_ann, basename, mode='right') + boxes_r, keypoints_r = preprocess_pifpaf(annotations_r, im_size=(1242, 374)) - for mode in ['left', 'right']: - annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename, mode=mode) - boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374)) + # Stereo baselines + if keypoints_r: + path_image = os.path.join(self.dir_images, basename + '.png') + path_image_r = os.path.join(self.dir_images_r, basename + '.png') + reid_features = get_reid_features(self.reid_net, boxes, boxes_r, path_image, path_image_r) + zzs, cnt = baselines_association(self.baselines, zzs, keypoints, keypoints_r, reid_features) - if not keypoints and mode == 'left': - cnt_no_file += 1 - break + for key in cnt: + self.cnt_disparity[key] += cnt[key] - elif not keypoints and mode == 'right': - stereo = False - - else: - # Run the network and the geometric baseline - outputs, varss = self.monoloco.forward(keypoints, kk) - dds_geom = eval_geometric(keypoints, kk, average_y=0.48) - - uv_centers = get_keypoints(keypoints, mode='bottom') # Kitti uses the bottom to calculate depth - xy_centers = pixel_to_camera(uv_centers, kk, 1) - - if mode == 'left': - outputs_l = outputs.detach().cpu() - varss_l = varss.detach().cpu() - zzs_l = xyz_from_distance(outputs_l[:, 0:1], xy_centers)[:, 2].tolist() - kps_l = copy.deepcopy(keypoints) - boxes_l = boxes - xy_centers_l = xy_centers - dds_geom_l = dds_geom - kk_l = kk - tt_l = tt - - else: - kps_r = copy.deepcopy(keypoints) - - if stereo: - zzs, cnt = depth_from_disparity(zzs_l, kps_l, kps_r) - cnt_disparity += cnt - else: - zzs = zzs_l - - # Save the file - all_outputs = [outputs_l, varss_l, dds_geom_l, zzs] - all_inputs = [boxes_l, xy_centers_l] - all_params = [kk_l, tt_l] - path_txt = os.path.join(dir_out, basename + '.txt') - save_txts(path_txt, all_inputs, all_outputs, all_params) - - # Update counting - cnt_ann += len(boxes_l) - cnt_file += 1 - - # Print statistics - print("Saved in {} txt {} annotations. Not found {} images." - .format(cnt_file, cnt_ann, cnt_no_file)) - print("Annotations corrected using stereo: {:.1f}%, not found {} stereo files" - .format(cnt_disparity / cnt_ann * 100, cnt_no_stereo)) + else: + self.cnt_no_stereo += 1 + zzs = {key: zzs for key in self.baselines} + return zzs -def save_txts(path_txt, all_inputs, all_outputs, all_params): +def save_txts(path_txt, all_inputs, all_outputs, all_params, mode='monoloco'): - outputs, varss, dds_geom, zzs = all_outputs[:] + assert mode in ('monoloco', 'baseline') + if mode == 'monoloco': + outputs, varss, dds_geom, zzs = all_outputs[:] + else: + zzs = all_outputs uv_boxes, xy_centers = all_inputs[:] kk, tt = all_params[:] with open(path_txt, "w+") as ff: - for idx in range(outputs.shape[0]): + for idx, zz_base in enumerate(zzs): xx = float(xy_centers[idx][0]) * zzs[idx] + tt[0] yy = float(xy_centers[idx][1]) * zzs[idx] + tt[1] - zz = zzs[idx] + tt[2] - dd = math.sqrt(xx ** 2 + yy ** 2 + zz ** 2) - cam_0 = [xx, yy, zz, dd] + zz = zz_base + tt[2] + cam_0 = [xx, yy, zz] + output_list = [0.]*3 + uv_boxes[idx][:-1] + [0.]*3 + cam_0 + [0.] + uv_boxes[idx][-1:] # kitti format + ff.write("%s " % 'pedestrian') + for el in output_list: + ff.write("%f " % el) - for el in uv_boxes[idx][:]: - ff.write("%s " % el) - for el in cam_0: - ff.write("%s " % el) - ff.write("%s " % float(outputs[idx][1])) - ff.write("%s " % float(varss[idx])) - ff.write("%s " % dds_geom[idx]) + # add additional uncertainty information + if mode == 'monoloco': + ff.write("%f " % float(outputs[idx][1])) + ff.write("%f " % float(varss[idx])) + ff.write("%f " % dds_geom[idx]) ff.write("\n") - # Save intrinsic matrix in the last row - for kk_el in itertools.chain(*kk): # Flatten a list of lists - ff.write("%f " % kk_el) - ff.write("\n") - - -def factory_basename(dir_ann): - """ Return all the basenames in the annotations folder""" - - list_ann = glob.glob(os.path.join(dir_ann, '*.json')) - list_basename = [os.path.basename(x).split('.')[0] for x in list_ann] - assert list_basename, " Missing json annotations file to create txt files for KITTI datasets" - return list_basename - def factory_file(path_calib, dir_ann, basename, mode='left'): """Choose the annotation and the calibration files. Stereo option with ite = 1""" @@ -201,11 +169,7 @@ def factory_file(path_calib, dir_ann, basename, mode='left'): kk, tt = p_right[:] path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json') - try: - with open(path_ann, 'r') as f: - annotations = json.load(f) - except FileNotFoundError: - annotations = [] + annotations = open_annotations(path_ann) return annotations, kk, tt @@ -230,3 +194,28 @@ def eval_geometric(keypoints, kk, average_y=0.48): dds_geom.append(dd_geom) return dds_geom + + +def make_new_directory(dir_out): + """Remove the output directory if already exists (avoid residual txt files)""" + if os.path.exists(dir_out): + shutil.rmtree(dir_out) + os.makedirs(dir_out) + + +def factory_basename(dir_ann, dir_gt): + """ Return all the basenames in the annotations folder corresponding to validation images""" + + # Extract ground truth validation images + names_gt = tuple(os.listdir(dir_gt)) + path_train = os.path.join('splits', 'kitti_train.txt') + path_val = os.path.join('splits', 'kitti_val.txt') + _, set_val_gt = split_training(names_gt, path_train, path_val) + set_val_gt = {os.path.basename(x).split('.')[0] for x in set_val_gt} + + # Extract pifpaf files corresponding to validation images + list_ann = glob.glob(os.path.join(dir_ann, '*.json')) + set_basename = {os.path.basename(x).split('.')[0] for x in list_ann} + set_val = set_basename.intersection(set_val_gt) + assert set_val, " Missing json annotations file to create txt files for KITTI datasets" + return set_val diff --git a/monoloco/eval/reid_baseline.py b/monoloco/eval/reid_baseline.py new file mode 100644 index 0000000..8dd4aee --- /dev/null +++ b/monoloco/eval/reid_baseline.py @@ -0,0 +1,110 @@ + +import torch +import torch.backends.cudnn as cudnn +from torch import nn +import torch.nn.functional as F +import torchvision +import torchvision.transforms as T + + +from ..utils import open_image + + +def get_reid_features(reid_net, boxes, boxes_r, path_image, path_image_r): + + pil_image = open_image(path_image) + pil_image_r = open_image(path_image_r) + assert boxes and boxes_r + cropped_img = [] + for box in boxes: + cropped_img = cropped_img + [pil_image.crop((box[0], box[1], box[2], box[3]))] + cropped_img_r = [] + for box in boxes_r: + cropped_img_r = cropped_img_r + [pil_image_r.crop((box[0], box[1], box[2], box[3]))] + + features = reid_net.forward(cropped_img) + features_r = reid_net.forward(cropped_img_r) + return features.cpu(), features_r.cpu() + + +class ReID(object): + def __init__(self, weights_path, device, num_classes=751, height=256, width=128): + super(ReID, self).__init__() + torch.manual_seed(1) + self.device = device + + if self.device.type == "cuda": + cudnn.benchmark = True + torch.cuda.manual_seed_all(1) + else: + print("Currently using CPU (GPU is highly recommended)") + + self.transform_test = T.Compose([ + T.Resize((height, width)), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + print("ReID Baseline:") + print("Initializing ResNet model") + self.model = ResNet50(num_classes=num_classes, loss={'xent'}) + self.model.to(device) + num_param = sum(p.numel() for p in self.model.parameters()) / 1e+06 + print("Model size: {:.3f} M".format(num_param)) + + # load pretrained weights but ignore layers that don't match in size + checkpoint = torch.load(weights_path) + model_dict = self.model.state_dict() + pretrain_dict = {k: v for k, v in checkpoint.items() if k in model_dict and model_dict[k].size() == v.size()} + model_dict.update(pretrain_dict) + self.model.load_state_dict(model_dict) + print("Loaded pretrained weights from '{}'".format(weights_path)) + self.model.eval() + + def forward(self, images): + image = torch.stack([self.transform_test(image) for image in images], dim=0) + + image = image.to(self.device) + with torch.no_grad(): + features = self.model(image) + return features + + @staticmethod + def calculate_distmat(features_1, features_2=None, use_cosine=False): + query = features_1 + if features_2 is not None: + gallery = features_2 + else: + gallery = features_1 + m = query.size(0) + n = gallery.size(0) + if not use_cosine: + distmat = torch.pow(query, 2).sum(dim=1, keepdim=True).expand(m, n) + \ + torch.pow(gallery, 2).sum(dim=1, keepdim=True).expand(n, m).t() + distmat.addmm_(1, -2, query, gallery.t()) + else: + features_norm = query/query.norm(dim=1)[:, None] + reference_norm = gallery/gallery.norm(dim=1)[:, None] + distmat = torch.mm(features_norm, reference_norm.transpose(0, 1)) + return distmat + + +class ResNet50(nn.Module): + def __init__(self, num_classes, loss): + super(ResNet50, self).__init__() + self.loss = loss + resnet50 = torchvision.models.resnet50(pretrained=True) + self.base = nn.Sequential(*list(resnet50.children())[:-2]) + self.classifier = nn.Linear(2048, num_classes) + self.feat_dim = 2048 + + def forward(self, x): + x = self.base(x) + x = F.avg_pool2d(x, x.size()[2:]) + f = x.view(x.size(0), -1) + if not self.training: + return f + y = self.classifier(f) + + if self.loss == {'xent'}: + return y + return y, f diff --git a/monoloco/eval/stereo_baselines.py b/monoloco/eval/stereo_baselines.py new file mode 100644 index 0000000..3a27abd --- /dev/null +++ b/monoloco/eval/stereo_baselines.py @@ -0,0 +1,176 @@ + +""""Generate stereo baselines for kitti evaluation""" + +import warnings +from collections import defaultdict + +import numpy as np + +from ..utils import get_keypoints + + +def baselines_association(baselines, zzs, keypoints, keypoints_right, reid_features): + """compute stereo depth for each of the given stereo baselines""" + + # Initialize variables + zzs_stereo = defaultdict() + cnt_stereo = defaultdict(int) + + features, features_r, keypoints, keypoints_r = factory_features( + keypoints, keypoints_right, baselines, reid_features) + + # count maximum possible associations + cnt_stereo['max'] = min(keypoints.shape[0], keypoints_r.shape[0]) + + # Filter joints disparity and calculate avg disparity + avg_disparities, disparities_x, disparities_y = mask_joint_disparity(keypoints, keypoints_r) + + # Iterate over each left pose + for key in baselines: + + # Extract features of the baseline + similarity = features_similarity(features[key], features_r[key], key, avg_disparities, zzs) + + # Compute the association based on features minimization and calculate depth + zzs_stereo[key] = np.empty((keypoints.shape[0])) + + indices_stereo = [] # keep track of indices + best = np.nanmin(similarity) + while not np.isnan(best): + idx, arg_best = np.unravel_index(np.nanargmin(similarity), similarity.shape) # pylint: disable=W0632 + zz_stereo, flag = similarity_to_depth(avg_disparities[idx, arg_best]) + zz_mono = zzs[idx] + similarity[idx, :] = np.nan + indices_stereo.append(idx) + + # Filter stereo depth + if flag and verify_stereo(zz_stereo, zz_mono, disparities_x[idx, arg_best], disparities_y[idx, arg_best]): + zzs_stereo[key][idx] = zz_stereo + cnt_stereo[key] += 1 + similarity[:, arg_best] = np.nan + else: + zzs_stereo[key][idx] = zz_mono + + best = np.nanmin(similarity) + indices_mono = [idx for idx, _ in enumerate(zzs) if idx not in indices_stereo] + for idx in indices_mono: + zzs_stereo[key][idx] = zzs[idx] + zzs_stereo[key] = zzs_stereo[key].tolist() + + return zzs_stereo, cnt_stereo + + +def factory_features(keypoints, keypoints_right, baselines, reid_features): + + features = defaultdict() + features_r = defaultdict() + + for key in baselines: + if key == 'reid': + features[key] = np.array(reid_features[0]) + features_r[key] = np.array(reid_features[1]) + else: + features[key] = np.array(keypoints) + features_r[key] = np.array(keypoints_right) + + return features, features_r, np.array(keypoints), np.array(keypoints_right) + + +def features_similarity(features, features_r, key, avg_disparities, zzs): + + similarity = np.empty((features.shape[0], features_r.shape[0])) + for idx, zz_mono in enumerate(zzs): + feature = features[idx] + + if key == 'ml_stereo': + expected_disparity = 0.54 * 721. / zz_mono + sim_row = np.abs(expected_disparity - avg_disparities[idx]) + + elif key == 'pose': + # Zero-center the keypoints + uv_center = np.array(get_keypoints(feature, mode='center').reshape(-1, 1)) # (1, 2) --> (2, 1) + uv_centers_r = np.array(get_keypoints(features_r, mode='center').unsqueeze(-1)) # (m,2) --> (m, 2, 1) + feature_0 = feature[:2, :] - uv_center + feature_0 = feature_0.reshape(1, -1) # (1, 34) + features_r_0 = features_r[:, :2, :] - uv_centers_r + features_r_0 = features_r_0.reshape(features_r_0.shape[0], -1) # (m, 34) + sim_row = np.linalg.norm(feature_0 - features_r_0, axis=1) + + else: + sim_row = np.linalg.norm(feature - features_r, axis=1) + + similarity[idx] = sim_row + return similarity + + +def similarity_to_depth(avg_disparity): + + try: + zz_stereo = 0.54 * 721. / float(avg_disparity) + flag = True + except (ZeroDivisionError, ValueError): # All nan-slices or zero division + zz_stereo = np.nan + flag = False + + return zz_stereo, flag + + +def mask_joint_disparity(keypoints, keypoints_r): + """filter joints based on confidence and interquartile range of the distribution""" + + CONF_MIN = 0.3 + with warnings.catch_warnings() and np.errstate(invalid='ignore'): + disparity_x_mask = np.empty((keypoints.shape[0], keypoints_r.shape[0], 17)) + disparity_y_mask = np.empty((keypoints.shape[0], keypoints_r.shape[0], 17)) + avg_disparity = np.empty((keypoints.shape[0], keypoints_r.shape[0])) + + for idx, kps in enumerate(keypoints): + disparity_x = kps[0, :] - keypoints_r[:, 0, :] + disparity_y = kps[1, :] - keypoints_r[:, 1, :] + + # Mask for low confidence + mask_conf_left = kps[2, :] > CONF_MIN + mask_conf_right = keypoints_r[:, 2, :] > CONF_MIN + mask_conf = mask_conf_left & mask_conf_right + disparity_x_conf = np.where(mask_conf, disparity_x, np.nan) + disparity_y_conf = np.where(mask_conf, disparity_y, np.nan) + + # Mask outliers using iqr + mask_outlier = interquartile_mask(disparity_x_conf) + x_mask_row = np.where(mask_outlier, disparity_x_conf, np.nan) + y_mask_row = np.where(mask_outlier, disparity_y_conf, np.nan) + avg_row = np.nanmedian(x_mask_row, axis=1) # ignore the nan + + # Append + disparity_x_mask[idx] = x_mask_row + disparity_y_mask[idx] = y_mask_row + avg_disparity[idx] = avg_row + + return avg_disparity, disparity_x_mask, disparity_y_mask + + +def verify_stereo(zz_stereo, zz_mono, disparity_x, disparity_y): + """Verify disparities based on coefficient of variation, maximum y difference and z difference wrt monoloco""" + + COV_MIN = 0.1 + y_max_difference = (50 / zz_mono) + z_max_difference = 0.6 * zz_mono + + cov = float(np.nanstd(disparity_x) / np.abs(np.nanmean(disparity_x))) # Coefficient of variation + avg_disparity_y = np.nanmedian(disparity_y) + + if abs(zz_stereo - zz_mono) < z_max_difference and \ + avg_disparity_y < y_max_difference and \ + cov < COV_MIN: + return True + # if not np.isnan(zz_stereo): + # return True + return False + + +def interquartile_mask(distribution): + quartile_1, quartile_3 = np.nanpercentile(distribution, [25, 75], axis=1) + iqr = quartile_3 - quartile_1 + lower_bound = quartile_1 - (iqr * 1.5) + upper_bound = quartile_3 + (iqr * 1.5) + return (distribution < upper_bound.reshape(-1, 1)) & (distribution > lower_bound.reshape(-1, 1)) diff --git a/monoloco/run.py b/monoloco/run.py index 68827bd..bcebb55 100644 --- a/monoloco/run.py +++ b/monoloco/run.py @@ -133,10 +133,9 @@ def main(): if args.generate: from .eval import GenerateKitti - kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout) - kitti_txt.run_mono() - if args.stereo: - kitti_txt.run_stereo() + kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout, + stereo=args.stereo) + kitti_txt.run() if args.dataset == 'kitti': from .eval import EvalKitti diff --git a/monoloco/utils/__init__.py b/monoloco/utils/__init__.py index e894156..1f06185 100644 --- a/monoloco/utils/__init__.py +++ b/monoloco/utils/__init__.py @@ -1,8 +1,7 @@ from .iou import get_iou_matches, reorder_matches, get_iou_matrix -from .misc import get_task_error, get_pixel_error, append_cluster +from .misc import get_task_error, get_pixel_error, append_cluster, open_annotations from .kitti import check_conditions, get_category, split_training, parse_ground_truth, get_calibration -from .camera import xyz_from_distance, get_keypoints, pixel_to_camera, project_3d +from .camera import xyz_from_distance, get_keypoints, pixel_to_camera, project_3d, open_image from .logs import set_logger -from .stereo import depth_from_disparity from ..utils.nuscenes import select_categories diff --git a/monoloco/utils/camera.py b/monoloco/utils/camera.py index 51200f0..3d41eda 100644 --- a/monoloco/utils/camera.py +++ b/monoloco/utils/camera.py @@ -2,15 +2,16 @@ import numpy as np import torch import torch.nn.functional as F +from PIL import Image def pixel_to_camera(uv_tensor, kk, z_met): """ Convert a tensor in pixel coordinate to absolute camera coordinates - It accepts lists or tensors of (m, 2) or (m, x, 2) or (m, 2, x) + It accepts lists or torch/numpy tensors of (m, 2) or (m, x, 2) where x is the number of keypoints """ - if isinstance(uv_tensor, list): + if isinstance(uv_tensor, (list, np.ndarray)): uv_tensor = torch.tensor(uv_tensor) if isinstance(kk, list): kk = torch.tensor(kk) @@ -67,14 +68,13 @@ def project_3d(box_obj, kk): def get_keypoints(keypoints, mode): """ Extract center, shoulder or hip points of a keypoint - Input --> list or torch.tensor [(m, 3, 17) or (3, 17)] + Input --> list or torch/numpy tensor [(m, 3, 17) or (3, 17)] Output --> torch.tensor [(m, 2)] """ - if isinstance(keypoints, list): + if isinstance(keypoints, (list, np.ndarray)): keypoints = torch.tensor(keypoints) if len(keypoints.size()) == 2: # add batch dim keypoints = keypoints.unsqueeze(0) - assert len(keypoints.size()) == 3 and keypoints.size()[1] == 3, "tensor dimensions not recognized" assert mode in ['center', 'bottom', 'head', 'shoulder', 'hip', 'ankle'] @@ -174,3 +174,9 @@ def xyz_from_distance(distances, xy_centers): assert xy_centers.size()[-1] == 3 and distances.size()[-1] == 1, "Size of tensor not recognized" return xy_centers * distances / torch.sqrt(1 + xy_centers[:, 0:1].pow(2) + xy_centers[:, 1:2].pow(2)) + + +def open_image(path_image): + with open(path_image, 'rb') as f: + pil_image = Image.open(f).convert('RGB') + return pil_image diff --git a/monoloco/utils/kitti.py b/monoloco/utils/kitti.py index 224509a..0542a71 100644 --- a/monoloco/utils/kitti.py +++ b/monoloco/utils/kitti.py @@ -74,15 +74,9 @@ def check_conditions(line, category, method, thresh=0.3): """Check conditions of our or m3d txt file""" check = False - assert method in ['gt', 'm3d', '3dop', 'our'], "Method %r not recognized" % method assert category in ['pedestrian', 'cyclist', 'all'] - if method in ('m3d', '3dop'): - conf = line.split()[15] - if line.split()[0] == category and float(conf) >= thresh: - check = True - - elif method == 'gt': + if method == 'gt': if category == 'all': categories_gt = ['Pedestrian', 'Person_sitting', 'Cyclist'] else: @@ -90,8 +84,17 @@ def check_conditions(line, category, method, thresh=0.3): if line.split()[0] in categories_gt: check = True - elif method == 'our': - if line[4] >= thresh: + elif method in ('m3d', '3dop'): + conf = float(line[15]) + if line[0] == category and conf >= thresh: + check = True + + elif method == 'monodepth': + check = True + + else: + conf = float(line[15]) + if conf >= thresh: check = True return check diff --git a/monoloco/utils/misc.py b/monoloco/utils/misc.py index b1c59f7..06ca582 100644 --- a/monoloco/utils/misc.py +++ b/monoloco/utils/misc.py @@ -1,4 +1,4 @@ - +import json def append_cluster(dic_jo, phase, xx, dd, kps): """Append the annotation based on its distance""" @@ -36,3 +36,12 @@ def get_pixel_error(dd_gt, zz_gt): disp = 0.54 * 721 / zz_gt delta_z = zz_gt - 0.54 * 721 / (disp - 1) return dd_gt + delta_z + + +def open_annotations(path_ann): + try: + with open(path_ann, 'r') as f: + annotations = json.load(f) + except FileNotFoundError: + annotations = [] + return annotations diff --git a/monoloco/utils/stereo.py b/monoloco/utils/stereo.py deleted file mode 100644 index 409f981..0000000 --- a/monoloco/utils/stereo.py +++ /dev/null @@ -1,87 +0,0 @@ - -import copy -import warnings - -import numpy as np - - -def depth_from_disparity(zzs, kps, kps_right): - """Associate instances in left and right images and compute disparity""" - zzs_stereo = [] - zzs = np.array(zzs) - kps = np.array(kps) - kps_right_list = copy.deepcopy(kps_right) - cnt_stereo = 0 - expected_disps = 0.54 * 721 / np.array(zzs) - - for idx, zz_mono in enumerate(zzs): - if kps_right_list: - - zz_stereo, disparity_x, disparity_y, idx_min = filter_disparities(kps, kps_right_list, idx, expected_disps) - - if verify_stereo(zz_stereo, zz_mono, disparity_x, disparity_y): - zzs_stereo.append(zz_stereo) - cnt_stereo += 1 - kps_right_list.pop(idx_min) - else: - zzs_stereo.append(zz_mono) - else: - zzs_stereo.append(zz_mono) - - return zzs_stereo, cnt_stereo - - -def filter_disparities(kps, kps_right_list, idx, expected_disps): - """filter joints based on confidence and interquartile range of the distribution""" - - CONF_MIN = 0.3 - kps_right = np.array(kps_right_list) - with warnings.catch_warnings() and np.errstate(invalid='ignore'): - try: - disparity_x = kps[idx, 0, :] - kps_right[:, 0, :] - disparity_y = kps[idx, 1, :] - kps_right[:, 1, :] - - # Mask for low confidence - mask_conf_left = kps[idx, 2, :] > CONF_MIN - mask_conf_right = kps_right[:, 2, :] > CONF_MIN - mask_conf = mask_conf_left & mask_conf_right - disparity_x_conf = np.where(mask_conf, disparity_x, np.nan) - disparity_y_conf = np.where(mask_conf, disparity_y, np.nan) - - # Mask outliers using iqr - mask_outlier = interquartile_mask(disparity_x_conf) - disparity_x_mask = np.where(mask_outlier, disparity_x_conf, np.nan) - disparity_y_mask = np.where(mask_outlier, disparity_y_conf, np.nan) - avg_disparity_x = np.nanmedian(disparity_x_mask, axis=1) # ignore the nan - diffs_x = [abs(expected_disps[idx] - real) for real in avg_disparity_x] - idx_min = diffs_x.index(min(diffs_x)) - zz_stereo = 0.54 * 721. / float(avg_disparity_x[idx_min]) - - except ZeroDivisionError: - zz_stereo = - 100 - - return zz_stereo, disparity_x_mask[idx_min], disparity_y_mask[idx_min], idx_min - - -def verify_stereo(zz_stereo, zz_mono, disparity_x, disparity_y): - - COV_MIN = 0.1 - y_max_difference = (50 / zz_mono) - z_max_difference = 0.6 * zz_mono - - cov = float(np.nanstd(disparity_x) / np.abs(np.nanmean(disparity_x))) # Coefficient of variation - avg_disparity_y = np.nanmedian(disparity_y) - - if abs(zz_stereo - zz_mono) < z_max_difference and \ - avg_disparity_y < y_max_difference and \ - cov < COV_MIN: - return True - return False - - -def interquartile_mask(distribution): - quartile_1, quartile_3 = np.nanpercentile(distribution, [25, 75], axis=1) - iqr = quartile_3 - quartile_1 - lower_bound = quartile_1 - (iqr * 1.5) - upper_bound = quartile_3 + (iqr * 1.5) - return (distribution < upper_bound.reshape(-1, 1)) & (distribution > lower_bound.reshape(-1, 1)) diff --git a/monoloco/visuals/__init__.py b/monoloco/visuals/__init__.py index 531874b..2ddecc4 100644 --- a/monoloco/visuals/__init__.py +++ b/monoloco/visuals/__init__.py @@ -1,3 +1,3 @@ from .printer import Printer -from .figures import show_results, show_spread +from .figures import show_results, show_spread, show_task_error diff --git a/monoloco/visuals/figures.py b/monoloco/visuals/figures.py index 327ae8c..c762885 100644 --- a/monoloco/visuals/figures.py +++ b/monoloco/visuals/figures.py @@ -24,10 +24,8 @@ def show_results(dic_stats, show=False, save=False): x_max = 38 xx = np.linspace(0, 60, 100) excl_clusters = ['all', '50', '>50', 'easy', 'moderate', 'hard'] - clusters = tuple([clst for clst in dic_stats[phase]['our'] if clst not in excl_clusters]) - + clusters = tuple([clst for clst in dic_stats[phase]['monoloco'] if clst not in excl_clusters]) yy_gender = get_task_error(xx) - yy_gps = np.linspace(5., 5., xx.shape[0]) plt.figure(0) plt.grid(linewidth=0.2) @@ -41,21 +39,22 @@ def show_results(dic_stats, show=False, save=False): colors = ['r', 'deepskyblue', 'grey', 'b', 'darkorange'] lstyles = ['solid', 'solid', 'solid', 'solid', 'dashdot'] - plt.plot(xx, yy_gps, '-', label="GPS Error", color='y') - for idx, method in enumerate(['m3d_merged', 'geom_merged', 'md_merged', 'our_merged', '3dop_merged']): + for idx, method in enumerate(['m3d_merged', 'geometric_merged', 'monodepth_merged', 'monoloco_merged', + '3dop_merged']): errs = [dic_stats[phase][method][clst]['mean'] for clst in clusters] + assert errs, "method %s empty" % method xxs = get_distances(clusters) plt.plot(xxs, errs, marker=mks[idx], markersize=mksizes[idx], linewidth=lws[idx], label=labels[idx], linestyle=lstyles[idx], color=colors[idx]) plt.plot(xx, yy_gender, '--', label="Task error", color='lightgreen', linewidth=2.5) plt.legend(loc='upper left') - if show: - plt.show() if save: path_fig = os.path.join(dir_out, 'results.png') plt.savefig(path_fig) print("Figure of results saved in {}".format(path_fig)) + if show: + plt.show() plt.close() @@ -103,12 +102,12 @@ def show_spread(dic_stats, show=False, save=False): fig.subplots_adjust(hspace=0.1) plt.setp([aa.get_yticklabels() for aa in fig.axes[:-1]], visible=False) plt.legend() - if show: - plt.show() if save: path_fig = os.path.join(dir_out, 'spread.png') plt.savefig(path_fig) print("Figure of confidence intervals saved in {}".format(path_fig)) + if show: + plt.show() plt.close() @@ -129,9 +128,7 @@ def show_task_error(show, save): yy_young_male = target_error(xx, mm_young_male) yy_young_female = target_error(xx, mm_young_female) yy_gender = target_error(xx, mm_gmm) - yy_gps = np.linspace(5., 5., xx.shape[0]) plt.grid(linewidth=0.3) - plt.plot(xx, yy_gps, color='y', label='GPS') plt.plot(xx, yy_young_male, linestyle='dotted', linewidth=2.1, color='b', label='Adult/young male') plt.plot(xx, yy_young_female, linestyle='dotted', linewidth=2.1, color='darkorange', label='Adult/young female') plt.plot(xx, yy_gender, '--', color='lightgreen', linewidth=2.8, label='Generic adult (task error)') @@ -141,12 +138,12 @@ def show_task_error(show, save): plt.xlabel("Ground-truth distance from the camera $d_{gt}$ [m]") plt.ylabel("Localization error $\hat{e}$ due to human height variation [m]") # pylint: disable=W1401 plt.legend(loc=(0.01, 0.55)) # Location from 0 to 1 from lower left - if show: - plt.show() if save: path_fig = os.path.join(dir_out, 'task_error.png') plt.savefig(path_fig) print("Figure of task error saved in {}".format(path_fig)) + if show: + plt.show() plt.close() diff --git a/tests/test_package.py b/tests/test_package.py index 9f3e427..6f083f2 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -53,7 +53,7 @@ def test_package(): # Training test val_acc, model = tst_trainer(JOINTS) - assert val_acc < 2 + assert val_acc < 2.5 # Prediction test dic_out, kk = tst_prediction(model, PIFPAF_KEYPOINTS)