diff --git a/.gitignore b/.gitignore index 6414f31..0eeba76 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ data/ .DS_store __pycache__ +Monoloco/*.pyc +.pytest* \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..4685a31 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,26 @@ + + +[BASIC] +variable-rgx=[a-z0-9_]{1,30}$ # to accept 2 (dfferent) letters variables + + +Good-names=xx,dd,zz,hh,ww,pp,kk,lr,w1,w2,w3,mm,im,uv,ax,COV_MIN,CONF_MIN + + +[TYPECHECK] + +disable=E1102,missing-docstring,useless-object-inheritance,duplicate-code,too-many-arguments,too-many-instance-attributes,too-many-locals,too-few-public-methods,arguments-differ,logging-format-interpolation + + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. + +generated-members=numpy.*,torch.*,cv2.* + +ignored-modules=nuscenes, tabulate, cv2 + + + +[FORMAT] +max-line-length=120 diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..45a5143 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,13 @@ +dist: xenial +language: python +python: + - "3.6" + - "3.7" +install: + - pip install openpifpaf + - pip install nuscenes-devkit + - pip install tabulate + - pip install pylint +script: + - pylint monoloco --disable=unused-variable,fixme + - pytest -vv diff --git a/README.md b/README.md index 1a793f7..6bf9cb3 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ All details for Pifpaf pose detector at [openpifpaf](https://github.com/vita-epf ``` -pip install nuscenes-devkit openpifpaf +pip install openpifpaf nuscenes-devkit tabulate ``` ### Data structure @@ -63,14 +63,14 @@ Alternatively, you can download a Pifpaf pre-trained model from [openpifpaf](htt # Interfaces All the commands are run through a main file called `main.py` using subparsers. -To check all the commands for the parser and the subparsers run: - -* `python3 src/main.py --help` -* `python3 src/main.py prep --help` -* `python3 src/main.py predict --help` -* `python3 src/main.py train --help` -* `python3 src/main.py eval --help` +To check all the commands for the parser and the subparsers (including openpifpaf ones) run: +* `python3 -m monoloco.run --help` +* `python3 -m monoloco.run predict --help` +* `python3 -m monoloco.run train --help` +* `python3 -m monoloco.run eval --help` +* `python3 -m monoloco.run prep --help` +or check the file `monoloco/run.py` # Predict The predict script receives an image (or an entire folder using glob expressions), @@ -96,7 +96,7 @@ If it does not find the file, it will generate images with all the predictions without ground-truth matching. Below an example with and without ground-truth matching. They have been created (adding or removing `--path_gt`) with: -`python3 src/main.py predict --networks monoloco --glob docs/002282.png --output_types combined --scale 2 +`python3 -m monoloco.run predict --networks monoloco --glob docs/002282.png --output_types combined --scale 2 --model data/models/monoloco-190513-1437.pkl --n_dropout 100 --z_max 30` With ground truth matching (only matching people): @@ -110,7 +110,7 @@ To accurately estimate distance, the focal length is necessary. However, it is still possible to test Monoloco on images where the calibration matrix is not available. Absolute distances are not meaningful but relative distance still are. Below an example on a generic image from the web, created with: -`python3 src/main.py predict --networks monoloco --glob docs/surf.jpg --output_types combined --model data/models/monoloco-190513-1437.pkl --n_dropout 100 --z_max 25` +`python3 -m monoloco.run predict --networks monoloco --glob docs/surf.jpg --output_types combined --model data/models/monoloco-190513-1437.pkl --n_dropout 100 --z_max 25` ![no calibration](docs/surf.jpg.combined.png) @@ -124,7 +124,7 @@ Multiple visualizations can be combined in different windows. The above gif has been obtained running on a Macbook the command: -`python src/main.py predict --webcam --scale 0.2 --output_types combined --z_max 10 --checkpoint resnet50` +`python3 -m monoloco.run predict --webcam --scale 0.2 --output_types combined --z_max 10 --checkpoint resnet50` # Preprocess @@ -148,7 +148,7 @@ You can create them running the predict script and using `--networks pifpaf`. ### Inputs joints for training MonoLoco is trained using 2D human pose joints matched with the ground truth location provided by -nuScenes or KITTI Dataset. To create the joints run: `python src/main.py prep` specifying: +nuScenes or KITTI Dataset. To create the joints run: `python3 -m monoloco.run prep` specifying: 1. `--dir_ann` annotation directory containing Pifpaf joints of KITTI or nuScenes. 2. `--dataset` Which dataset to preprocess. For nuscenes, all three versions of the @@ -163,12 +163,12 @@ by the image name to easily access ground truth files for evaluation and predict # Train Provide the json file containing the preprocess joints as argument. -As simple as `python3 src/main.py --train --joints ` +As simple as `python3 -m monoloco.run --train --joints ` -All the hyperparameters options can be checked at `python3 src/main.py train --help`. +All the hyperparameters options can be checked at `python3 -m monoloco.run train --help`. ### Hyperparameters tuning -Random search in log space is provided. An example: `python3 src/main.py train --hyp --multiplier 10 --r_seed 1`. +Random search in log space is provided. An example: `python3 -m monoloco.run train --hyp --multiplier 10 --r_seed 1`. One iteration of the multiplier includes 6 runs. @@ -176,7 +176,7 @@ One iteration of the multiplier includes 6 runs. Evaluate performances of the trained model on KITTI or Nuscenes Dataset. ### 1) nuScenes Evaluation on nuScenes is already provided during training. It is also possible to evaluate an existing model running -`python src/main.py eval --dataset nuscenes --model ` +`python3 -m monoloco.run eval --dataset nuscenes --model ` ### 2) KITTI ### Baselines @@ -186,7 +186,7 @@ and stereo Baselines: [Mono3D](https://www.cs.toronto.edu/~urtasun/publications/chen_etal_cvpr16.pdf), [3DOP](https://xiaozhichen.github.io/papers/nips15chen.pdf), [MonoDepth](https://arxiv.org/abs/1609.03677) and our -[Geometrical Baseline](src/eval/geom_baseline.py). +[Geometrical Baseline](monoloco/eval/geom_baseline.py). * **Mono3D**: download validation files from [here](http://3dimage.ee.tsinghua.edu.cn/cxz/mono3d) and save them into `data/kitti/m3d` @@ -196,7 +196,7 @@ and save them into `data/kitti/3dop` [here](https://github.com/Parrotlife/pedestrianDepth-baseline/tree/master/MonoDepth-PyTorch) and save them into `data/kitti/monodepth` * **GeometricalBaseline**: A geometrical baseline comparison is provided. -The best average value for comparison can be created running `python src/main.py eval --geometric` +The best average value for comparison can be created running `python3 -m monoloco.run eval --geometric` #### Evaluation First the model preprocess the joints starting from json annotations predicted from pifpaf, @@ -205,7 +205,7 @@ in txt file with format comparable to other baseline. Then the model performs evaluation. The following graph is obtained running: -`python3 src/main.py eval --dataset kitti --generate --model data/models/monoloco-190513-1437.pkl +`python3 -m monoloco.run eval --dataset kitti --generate --model data/models/monoloco-190513-1437.pkl --dir_ann ` ![kitti_evaluation](docs/results.png) diff --git a/monoloco/__init__.py b/monoloco/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/monoloco/eval/__init__.py b/monoloco/eval/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/eval/kitti_eval.py b/monoloco/eval/eval_kitti.py similarity index 54% rename from src/eval/kitti_eval.py rename to monoloco/eval/eval_kitti.py index aee04e1..65a13ab 100644 --- a/src/eval/kitti_eval.py +++ b/monoloco/eval/eval_kitti.py @@ -1,38 +1,45 @@ -"""Evaluate Monoloco code on KITTI dataset using ALE and ALP metrics""" - -import os -import math -import logging -from collections import defaultdict -import datetime - -from utils.iou import get_iou_matches -from utils.misc import get_task_error -from utils.kitti import check_conditions, get_category, split_training, parse_ground_truth -from visuals.results import print_results - - -class KittiEval: - """ - Evaluate Monoloco code and compare it with the following baselines: +"""Evaluate Monoloco code on KITTI dataset using ALE and ALP metrics with the following baselines: - Mono3D - 3DOP - MonoDepth """ + +import os +import math +import logging +import datetime +from collections import defaultdict +from itertools import chain + +from tabulate import tabulate + +from ..utils.iou import get_iou_matches +from ..utils.misc import get_task_error, get_pixel_error +from ..utils.kitti import check_conditions, get_category, split_training, parse_ground_truth +from ..visuals.results import print_results + + +class EvalKitti: + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) CLUSTERS = ('easy', 'moderate', 'hard', 'all', '6', '10', '15', '20', '25', '30', '40', '50', '>50') - dic_stds = defaultdict(lambda: defaultdict(list)) - dic_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))) - dic_cnt = defaultdict(int) - errors = defaultdict(lambda: defaultdict(list)) + METHODS = ['m3d', 'geom', 'task_error', '3dop', 'our'] + HEADERS = ['method', '<0.5', '<1m', '<2m', 'easy', 'moderate', 'hard', 'all'] + CATEGORIES = ['pedestrian', 'cyclist'] + + def __init__(self, thresh_iou_our=0.3, thresh_iou_m3d=0.3, thresh_conf_m3d=0.3, thresh_conf_our=0.3, + verbose=False, stereo=False): - def __init__(self, thresh_iou_our=0.3, thresh_iou_m3d=0.5, thresh_conf_m3d=0.5, thresh_conf_our=0.3): self.dir_gt = os.path.join('data', 'kitti', 'gt') self.dir_m3d = os.path.join('data', 'kitti', 'm3d') self.dir_3dop = os.path.join('data', 'kitti', '3dop') self.dir_md = os.path.join('data', 'kitti', 'monodepth') self.dir_our = os.path.join('data', 'kitti', 'monoloco') + self.stereo = stereo + if self.stereo: + self.dir_our_stereo = os.path.join('data', 'kitti', 'monoloco_stereo') + self.METHODS.extend(['our_stereo', 'pixel_error']) path_train = os.path.join('splits', 'kitti_train.txt') path_val = os.path.join('splits', 'kitti_val.txt') dir_logs = os.path.join('data', 'logs') @@ -41,106 +48,101 @@ class KittiEval: now = datetime.datetime.now() now_time = now.strftime("%Y%m%d-%H%M")[2:] self.path_results = os.path.join(dir_logs, 'eval-' + now_time + '.json') + self.verbose = verbose assert os.path.exists(self.dir_m3d) and os.path.exists(self.dir_our) \ and os.path.exists(self.dir_3dop) self.dic_thresh_iou = {'m3d': thresh_iou_m3d, '3dop': thresh_iou_m3d, - 'md': thresh_iou_our, 'our': thresh_iou_our} - self.dic_thresh_conf = {'m3d': thresh_conf_m3d, '3dop': thresh_conf_m3d, 'our': thresh_conf_our} + 'md': thresh_iou_our, 'our': thresh_iou_our, 'our_stereo': thresh_iou_our} + self.dic_thresh_conf = {'m3d': thresh_conf_m3d, '3dop': thresh_conf_m3d, + 'our': thresh_conf_our, 'our_stereo': thresh_conf_our} # Extract validation images for evaluation names_gt = tuple(os.listdir(self.dir_gt)) _, self.set_val = split_training(names_gt, path_train, path_val) + # Define variables to save statistics + self.errors = None + self.dic_stds = None + self.dic_stats = None + self.dic_cnt = None + self.cnt_stereo_error = None + self.cnt_gt = 0 + def run(self): """Evaluate Monoloco performances on ALP and ALE metrics""" - # Iterate over each ground truth file in the training set - cnt_gt = 0 - for name in self.set_val: - path_gt = os.path.join(self.dir_gt, name) - path_m3d = os.path.join(self.dir_m3d, name) - path_our = os.path.join(self.dir_our, name) - path_3dop = os.path.join(self.dir_3dop, name) - path_md = os.path.join(self.dir_md, name) + for category in self.CATEGORIES: - # Iterate over each line of the gt file and save box location and distances - out_gt = parse_ground_truth(path_gt) - cnt_gt += len(out_gt[0]) + # Initialize variables + self.errors = defaultdict(lambda: defaultdict(list)) + self.dic_stds = defaultdict(lambda: defaultdict(list)) + self.dic_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))) + self.dic_cnt = defaultdict(int) + self.cnt_gt = 0 + self.cnt_stereo_error = 0 - # Extract annotations for the same file - if out_gt[0]: - out_m3d = self._parse_txts(path_m3d, method='m3d') - out_3dop = self._parse_txts(path_3dop, method='3dop') - out_md = self._parse_txts(path_md, method='md') - out_our = self._parse_txts(path_our, method='our') + # Iterate over each ground truth file in the training set + for name in self.set_val: + path_gt = os.path.join(self.dir_gt, name) + path_m3d = os.path.join(self.dir_m3d, name) + path_our = os.path.join(self.dir_our, name) + if self.stereo: + path_our_stereo = os.path.join(self.dir_our_stereo, name) + path_3dop = os.path.join(self.dir_3dop, name) + path_md = os.path.join(self.dir_md, name) - # Compute the error with ground truth - self._estimate_error(out_gt, out_m3d, method='m3d') - self._estimate_error(out_gt, out_3dop, method='3dop') - self._estimate_error(out_gt, out_md, method='md') - self._estimate_error(out_gt, out_our, method='our') + # Iterate over each line of the gt file and save box location and distances + out_gt = parse_ground_truth(path_gt, category) + self.cnt_gt += len(out_gt[0]) - # Iterate over all the files together to find a pool of common annotations - self._compare_error(out_gt, out_m3d, out_3dop, out_md, out_our) + # Extract annotations for the same file + if out_gt[0]: + out_m3d = self._parse_txts(path_m3d, category, method='m3d') + out_3dop = self._parse_txts(path_3dop, category, method='3dop') + # out_md = self._parse_txts(path_md, category, method='md') + out_md = out_m3d + out_our = self._parse_txts(path_our, category, method='our') + out_our_stereo = self._parse_txts(path_our_stereo, category, method='our') if self.stereo else [] - # Update statistics of errors and uncertainty - for key in self.errors: - add_true_negatives(self.errors[key], cnt_gt) - for clst in self.CLUSTERS[:-2]: # M3d and pifpaf does not have annotations above 40 meters - get_statistics(self.dic_stats['test'][key][clst], self.errors[key][clst], self.dic_stds[clst], key) + # Compute the error with ground truth + self._estimate_error(out_gt, out_m3d, method='m3d') + self._estimate_error(out_gt, out_3dop, method='3dop') + # self._estimate_error(out_gt, out_md, method='md') + self._estimate_error(out_gt, out_our, method='our') + if self.stereo: + self._estimate_error(out_gt, out_our_stereo, method='our_stereo') - # Show statistics - print(" Number of GT annotations: {} ".format(cnt_gt)) - for key in self.errors: - if key in ['our', 'm3d', '3dop']: - print(" Number of {} annotations with confidence >= {} : {} " - .format(key, self.dic_thresh_conf[key], self.dic_cnt[key])) + # Iterate over all the files together to find a pool of common annotations + self._compare_error(out_gt, out_m3d, out_3dop, out_md, out_our, out_our_stereo) - for clst in self.CLUSTERS[:-9]: - print(" {} Average error in cluster {}: {:.2f} with a max error of {:.1f}, " - "for {} annotations" - .format(key, clst, self.dic_stats['test'][key][clst]['mean'], - self.dic_stats['test'][key][clst]['max'], - self.dic_stats['test'][key][clst]['cnt'])) + # Update statistics of errors and uncertainty + for key in self.errors: + add_true_negatives(self.errors[key], self.cnt_gt) + for clst in self.CLUSTERS[:-2]: # M3d and pifpaf does not have annotations above 40 meters + get_statistics(self.dic_stats['test'][key][clst], self.errors[key][clst], self.dic_stds[clst], key) - if key == 'our': - print("% of annotation inside the confidence interval: {:.1f} %, " - "of which {:.1f} % at higher risk" - .format(100 * self.dic_stats['test'][key][clst]['interval'], - 100 * self.dic_stats['test'][key][clst]['at_risk'])) - - for perc in ['<0.5m', '<1m', '<2m']: - print("{} Instances with error {}: {:.2f} %" - .format(key, perc, 100 * sum(self.errors[key][perc])/len(self.errors[key][perc]))) - - print("\n Number of matched annotations: {:.1f} %".format(self.errors[key]['matched'])) - print("-"*100) - - print("\n Annotations inside the confidence interval: {:.1f} %" - .format(100 * self.dic_stats['test']['our']['all']['interval'])) - print("precision 1: {:.2f}".format(self.dic_stats['test']['our']['all']['prec_1'])) - print("precision 2: {:.2f}".format(self.dic_stats['test']['our']['all']['prec_2'])) + # Show statistics + print('\n' + category.upper() + ':') + self.show_statistics() def printer(self, show): print_results(self.dic_stats, show) - def _parse_txts(self, path, method): + def _parse_txts(self, path, category, method): boxes = [] dds = [] stds_ale = [] stds_epi = [] dds_geom = [] - # xyzs = [] - # xy_kps = [] # Iterate over each line of the txt file if method in ['3dop', 'm3d']: try: with open(path, "r") as ff: for line in ff: - if check_conditions(line, thresh=self.dic_thresh_conf[method], mode=method): + if check_conditions(line, category, method=method, thresh=self.dic_thresh_conf[method]): boxes.append([float(x) for x in line.split()[4:8]]) loc = ([float(x) for x in line.split()[11:14]]) dds.append(math.sqrt(loc[0] ** 2 + loc[1] ** 2 + loc[2] ** 2)) @@ -155,7 +157,7 @@ class KittiEval: with open(path, "r") as ff: for line in ff: box = [float(x[:-1]) for x in line.split()[0:4]] - delta_h = (box[3] - box[1]) / 10 + delta_h = (box[3] - box[1]) / 10 # TODO Add new value delta_w = (box[2] - box[0]) / 10 assert delta_h > 0 and delta_w > 0, "Bounding box <=0" box[0] -= delta_w @@ -178,13 +180,14 @@ class KittiEval: for line_our in file_lines[:-1]: line_list = [float(x) for x in line_our.split()] - if check_conditions(line_list, thresh=self.dic_thresh_conf[method], mode=method): + if check_conditions(line_list, category, method=method, thresh=self.dic_thresh_conf[method]): boxes.append(line_list[:4]) dds.append(line_list[8]) stds_ale.append(line_list[9]) stds_epi.append(line_list[10]) dds_geom.append(line_list[11]) self.dic_cnt[method] += 1 + self.dic_cnt['geom'] += 1 # kk_list = [float(x) for x in file_lines[-1].split()] @@ -196,8 +199,8 @@ class KittiEval: def _estimate_error(self, out_gt, out, method): """Estimate localization error""" - boxes_gt, _, dds_gt, truncs_gt, occs_gt = out_gt - if method == 'our': + boxes_gt, _, dds_gt, zzs_gt, truncs_gt, occs_gt = out_gt + if method[:3] == 'our': boxes, dds, stds_ale, stds_epi, dds_geom = out else: boxes, dds = out @@ -208,19 +211,28 @@ class KittiEval: # Update error if match is found cat = get_category(boxes_gt[idx_gt], truncs_gt[idx_gt], occs_gt[idx_gt]) self.update_errors(dds[idx], dds_gt[idx_gt], cat, self.errors[method]) + if method == 'our': self.update_errors(dds_geom[idx], dds_gt[idx_gt], cat, self.errors['geom']) self.update_uncertainty(stds_ale[idx], stds_epi[idx], dds[idx], dds_gt[idx_gt], cat) + dd_task_error = dds_gt[idx_gt] + (get_task_error(dds_gt[idx_gt], mode='mad'))**2 + self.update_errors(dd_task_error, dds_gt[idx_gt], cat, self.errors['task_error']) - def _compare_error(self, out_gt, out_m3d, out_3dop, out_md, out_our): + elif method == 'our_stereo': + dd_pixel_error = get_pixel_error(dds_gt[idx_gt], zzs_gt[idx_gt]) + self.update_errors(dd_pixel_error, dds_gt[idx_gt], cat, self.errors['pixel_error']) + + def _compare_error(self, out_gt, out_m3d, out_3dop, out_md, out_our, out_our_stereo): """Compare the error for a pool of instances commonly matched by all methods""" # Extract outputs of each method - boxes_gt, _, dds_gt, truncs_gt, occs_gt = out_gt + boxes_gt, _, dds_gt, zzs_gt, truncs_gt, occs_gt = out_gt boxes_m3d, dds_m3d = out_m3d boxes_3dop, dds_3dop = out_3dop boxes_md, dds_md = out_md boxes_our, dds_our, _, _, dds_geom = out_our + if self.stereo: + boxes_our_stereo, dds_our_stereo, _, _, dds_geom_stereo = out_our_stereo # Find IoU matches matches_our = get_iou_matches(boxes_our, boxes_gt, self.dic_thresh_iou['our']) @@ -234,12 +246,25 @@ class KittiEval: if check: cat = get_category(boxes_gt[idx_gt], truncs_gt[idx_gt], occs_gt[idx_gt]) dd_gt = dds_gt[idx_gt] + self.update_errors(dds_our[idx], dd_gt, cat, self.errors['our_merged']) self.update_errors(dds_geom[idx], dd_gt, cat, self.errors['geom_merged']) + self.update_errors(dd_gt + get_task_error(dd_gt, mode='mad'), + dd_gt, cat, self.errors['task_error_merged']) self.update_errors(dds_m3d[indices[0]], dd_gt, cat, self.errors['m3d_merged']) self.update_errors(dds_3dop[indices[1]], dd_gt, cat, self.errors['3dop_merged']) self.update_errors(dds_md[indices[2]], dd_gt, cat, self.errors['md_merged']) - self.dic_cnt['merged'] += 1 + if self.stereo: + self.update_errors(dds_our_stereo[idx], dd_gt, cat, self.errors['our_stereo_merged']) + dd_pixel = get_pixel_error(dd_gt, zzs_gt[idx_gt]) + self.update_errors(dd_pixel, dd_gt, cat, self.errors['pixel_error_merged']) + error = abs(dds_our[idx] - dd_gt) + error_stereo = abs(dds_our_stereo[idx] - dd_gt) + if error_stereo > (error + 0.1): + self.cnt_stereo_error += 1 + + for key in self.METHODS: + self.dic_cnt[key + '_merged'] += 1 def update_errors(self, dd, dd_gt, cat, errors): """Compute and save errors between a single box and the gt box which match""" @@ -320,21 +345,74 @@ class KittiEval: self.dic_stds[clst]['prec_2'].append(prec_2) self.dic_stds[cat]['prec_2'].append(prec_2) + def show_statistics(self): + + print('-'*90) + alp = [[str(100 * average(self.errors[key][perc]))[:4] + for perc in ['<0.5m', '<1m', '<2m']] + for key in self.METHODS] + + ale = [[str(self.dic_stats['test'][key + '_merged'][clst]['mean'])[:4] + ' (' + + str(self.dic_stats['test'][key][clst]['mean'])[:4] + ')' + for clst in self.CLUSTERS[:4]] + for key in self.METHODS] + + results = [[key] + alp[idx] + ale[idx] for idx, key in enumerate(self.METHODS)] + print(tabulate(results, headers=self.HEADERS)) + print('-'*90 + '\n') + + if self.verbose: + methods_all = list(chain.from_iterable((method, method + '_merged') for method in self.METHODS)) + for key in methods_all: + for clst in self.CLUSTERS[:4]: + print(" {} Average error in cluster {}: {:.2f} with a max error of {:.1f}, " + "for {} annotations" + .format(key, clst, self.dic_stats['test'][key][clst]['mean'], + self.dic_stats['test'][key][clst]['max'], + self.dic_stats['test'][key][clst]['cnt'])) + + if key == 'our': + print("% of annotation inside the confidence interval: {:.1f} %, " + "of which {:.1f} % at higher risk" + .format(self.dic_stats['test'][key][clst]['interval'], + self.dic_stats['test'][key][clst]['at_risk'])) + + for perc in ['<0.5m', '<1m', '<2m']: + print("{} Instances with error {}: {:.2f} %" + .format(key, perc, 100 * average(self.errors[key][perc]))) + + print("\nMatched annotations: {:.1f} %".format(self.errors[key]['matched'])) + print(" Detected annotations : {}/{} ".format(self.dic_cnt[key], self.cnt_gt)) + print("-" * 100) + + print("\n Annotations inside the confidence interval: {:.1f} %" + .format(self.dic_stats['test']['our']['all']['interval'])) + print("precision 1: {:.2f}".format(self.dic_stats['test']['our']['all']['prec_1'])) + print("precision 2: {:.2f}".format(self.dic_stats['test']['our']['all']['prec_2'])) + if self.stereo: + print("Stereo error greater than mono: {:.1f} %" + .format(100 * self.cnt_stereo_error / self.dic_cnt['our_merged'])) + def get_statistics(dic_stats, errors, dic_stds, key): """Update statistics of a cluster""" - dic_stats['mean'] = sum(errors) / float(len(errors)) - dic_stats['max'] = max(errors) - dic_stats['cnt'] = len(errors) + try: + dic_stats['mean'] = average(errors) + dic_stats['max'] = max(errors) + dic_stats['cnt'] = len(errors) + except (ZeroDivisionError, ValueError): + dic_stats['mean'] = 0. + dic_stats['max'] = 0. + dic_stats['cnt'] = 0. if key == 'our': - dic_stats['std_ale'] = sum(dic_stds['ale']) / float(len(dic_stds['ale'])) - dic_stats['std_epi'] = sum(dic_stds['epi']) / float(len(dic_stds['epi'])) - dic_stats['interval'] = sum(dic_stds['interval']) / float(len(dic_stds['interval'])) - dic_stats['at_risk'] = sum(dic_stds['at_risk']) / float(len(dic_stds['at_risk'])) - dic_stats['prec_1'] = sum(dic_stds['prec_1']) / float(len(dic_stds['prec_1'])) - dic_stats['prec_2'] = sum(dic_stds['prec_2']) / float(len(dic_stds['prec_2'])) + dic_stats['std_ale'] = average(dic_stds['ale']) + dic_stats['std_epi'] = average(dic_stds['epi']) + dic_stats['interval'] = average(dic_stds['interval']) + dic_stats['at_risk'] = average(dic_stds['at_risk']) + dic_stats['prec_1'] = average(dic_stds['prec_1']) + dic_stats['prec_2'] = average(dic_stds['prec_2']) def add_true_negatives(err, cnt_gt): @@ -379,3 +457,8 @@ def extract_indices(idx_to_check, *args): checks[idx_method] = True indices.append(idx_pred) return all(checks), indices + + +def average(my_list): + """calculate mean of a list""" + return sum(my_list) / len(my_list) diff --git a/monoloco/eval/generate_kitti.py b/monoloco/eval/generate_kitti.py new file mode 100644 index 0000000..dd3bcb2 --- /dev/null +++ b/monoloco/eval/generate_kitti.py @@ -0,0 +1,234 @@ +"""Run monoloco over all the pifpaf joints of KITTI images +and extract and save the annotations in txt files""" + + +import math +import os +import glob +import json +import shutil +import itertools +import copy + +import numpy as np +import torch + +from ..predict.network import MonoLoco +from ..eval.geom_baseline import compute_distance +from ..utils.kitti import get_calibration +from ..utils.pifpaf import preprocess_pif +from ..utils.camera import xyz_from_distance, get_keypoints, pixel_to_camera +from ..utils.stereo import depth_from_disparity + + +class GenerateKitti: + + def __init__(self, model, dir_ann, p_dropout=0.2, n_dropout=0): + + # Load monoloco + use_cuda = torch.cuda.is_available() + device = torch.device("cuda" if use_cuda else "cpu") + self.monoloco = MonoLoco(model_path=model, device=device, n_dropout=n_dropout, p_dropout=p_dropout) + self.dir_out = os.path.join('data', 'kitti', 'monoloco') + self.dir_ann = dir_ann + + # List of images + self.list_basename = factory_basename(dir_ann) + self.dir_kk = os.path.join('data', 'kitti', 'calib') + + def run_mono(self): + """Run Monoloco and save txt files for KITTI evaluation""" + + cnt_ann = cnt_file = cnt_no_file = 0 + dir_out = os.path.join('data', 'kitti', 'monoloco') + # Remove the output directory if alreaady exists (avoid residual txt files) + if os.path.exists(dir_out): + shutil.rmtree(dir_out) + os.makedirs(dir_out) + print("\nCreated empty output directory for txt files") + + # Run monoloco over the list of images + for basename in self.list_basename: + path_calib = os.path.join(self.dir_kk, basename + '.txt') + annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename) + boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374)) + + if not keypoints: + cnt_no_file += 1 + continue + else: + # Run the network and the geometric baseline + outputs, varss = self.monoloco.forward(keypoints, kk) + dds_geom = eval_geometric(keypoints, kk, average_y=0.48) + + # Save the file + uv_centers = get_keypoints(keypoints, mode='bottom') # Kitti uses the bottom center to calculate depth + xy_centers = pixel_to_camera(uv_centers, kk, 1) + outputs = outputs.detach().cpu() + zzs = xyz_from_distance(outputs[:, 0:1], xy_centers)[:, 2].tolist() + all_outputs = [outputs.detach().cpu(), varss.detach().cpu(), dds_geom, zzs] + all_inputs = [boxes, xy_centers] + all_params = [kk, tt] + path_txt = os.path.join(dir_out, basename + '.txt') + save_txts(path_txt, all_inputs, all_outputs, all_params) + + # Update counting + cnt_ann += len(boxes) + cnt_file += 1 + print("Saved in {} txt {} annotations. Not found {} images\n".format(cnt_file, cnt_ann, cnt_no_file)) + + def run_stereo(self): + """Run monoloco on left and right images and alculate disparity if a match is found""" + + cnt_ann = cnt_file = cnt_no_file = cnt_no_stereo = cnt_disparity = 0 + dir_out = os.path.join('data', 'kitti', 'monoloco_stereo') + + # Remove the output directory if alreaady exists (avoid residual txt files) + if os.path.exists(dir_out): + shutil.rmtree(dir_out) + os.makedirs(dir_out) + print("Created empty output directory for txt STEREO files") + + for basename in self.list_basename: + path_calib = os.path.join(self.dir_kk, basename + '.txt') + stereo = True + + for mode in ['left', 'right']: + annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename, mode=mode) + boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374)) + + if not keypoints and mode == 'left': + cnt_no_file += 1 + break + + elif not keypoints and mode == 'right': + stereo = False + + else: + # Run the network and the geometric baseline + outputs, varss = self.monoloco.forward(keypoints, kk) + dds_geom = eval_geometric(keypoints, kk, average_y=0.48) + + uv_centers = get_keypoints(keypoints, mode='bottom') # Kitti uses the bottom to calculate depth + xy_centers = pixel_to_camera(uv_centers, kk, 1) + + if mode == 'left': + outputs_l = outputs.detach().cpu() + varss_l = varss.detach().cpu() + zzs_l = xyz_from_distance(outputs_l[:, 0:1], xy_centers)[:, 2].tolist() + kps_l = copy.deepcopy(keypoints) + boxes_l = boxes + xy_centers_l = xy_centers + dds_geom_l = dds_geom + kk_l = kk + tt_l = tt + + else: + kps_r = copy.deepcopy(keypoints) + + if stereo: + zzs, cnt = depth_from_disparity(zzs_l, kps_l, kps_r) + cnt_disparity += cnt + else: + zzs = zzs_l + + # Save the file + all_outputs = [outputs_l, varss_l, dds_geom_l, zzs] + all_inputs = [boxes_l, xy_centers_l] + all_params = [kk_l, tt_l] + path_txt = os.path.join(dir_out, basename + '.txt') + save_txts(path_txt, all_inputs, all_outputs, all_params) + + # Update counting + cnt_ann += len(boxes_l) + cnt_file += 1 + + # Print statistics + print("Saved in {} txt {} annotations. Not found {} images." + .format(cnt_file, cnt_ann, cnt_no_file)) + print("Annotations corrected using stereo: {:.1f}%, not found {} stereo files" + .format(cnt_disparity / cnt_ann * 100, cnt_no_stereo)) + + +def save_txts(path_txt, all_inputs, all_outputs, all_params): + + outputs, varss, dds_geom, zzs = all_outputs[:] + uv_boxes, xy_centers = all_inputs[:] + kk, tt = all_params[:] + + with open(path_txt, "w+") as ff: + for idx in range(outputs.shape[0]): + + xx = float(xy_centers[idx][0]) * zzs[idx] + tt[0] + yy = float(xy_centers[idx][1]) * zzs[idx] + tt[1] + zz = zzs[idx] + tt[2] + dd = math.sqrt(xx ** 2 + yy ** 2 + zz ** 2) + cam_0 = [xx, yy, zz, dd] + + for el in uv_boxes[idx][:]: + ff.write("%s " % el) + for el in cam_0: + ff.write("%s " % el) + ff.write("%s " % float(outputs[idx][1])) + ff.write("%s " % float(varss[idx])) + ff.write("%s " % dds_geom[idx]) + ff.write("\n") + + # Save intrinsic matrix in the last row + for kk_el in itertools.chain(*kk): # Flatten a list of lists + ff.write("%f " % kk_el) + ff.write("\n") + + +def factory_basename(dir_ann): + """ Return all the basenames in the annotations folder""" + + list_ann = glob.glob(os.path.join(dir_ann, '*.json')) + list_basename = [os.path.basename(x).split('.')[0] for x in list_ann] + assert list_basename, " Missing json annotations file to create txt files for KITTI datasets" + return list_basename + + +def factory_file(path_calib, dir_ann, basename, mode='left'): + """Choose the annotation and the calibration files. Stereo option with ite = 1""" + + assert mode in ('left', 'right') + p_left, p_right = get_calibration(path_calib) + + if mode == 'left': + kk, tt = p_left[:] + path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json') + + else: + kk, tt = p_right[:] + path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json') + + try: + with open(path_ann, 'r') as f: + annotations = json.load(f) + except FileNotFoundError: + annotations = [] + + return annotations, kk, tt + + +def eval_geometric(keypoints, kk, average_y=0.48): + """ Evaluate geometric distance""" + + dds_geom = [] + + uv_centers = get_keypoints(keypoints, mode='center') + uv_shoulders = get_keypoints(keypoints, mode='shoulder') + uv_hips = get_keypoints(keypoints, mode='hip') + + xy_centers = pixel_to_camera(uv_centers, kk, 1) + xy_shoulders = pixel_to_camera(uv_shoulders, kk, 1) + xy_hips = pixel_to_camera(uv_hips, kk, 1) + + for idx, xy_center in enumerate(xy_centers): + zz = compute_distance(xy_shoulders[idx], xy_hips[idx], average_y) + xyz_center = np.array([xy_center[0], xy_center[1], zz]) + dd_geom = float(np.linalg.norm(xyz_center)) + dds_geom.append(dd_geom) + + return dds_geom diff --git a/src/eval/geom_baseline.py b/monoloco/eval/geom_baseline.py similarity index 94% rename from src/eval/geom_baseline.py rename to monoloco/eval/geom_baseline.py index cd7e6a8..8d45ce4 100644 --- a/src/eval/geom_baseline.py +++ b/monoloco/eval/geom_baseline.py @@ -6,12 +6,10 @@ from collections import defaultdict import numpy as np -from utils.camera import pixel_to_camera, get_keypoints +from ..utils.camera import pixel_to_camera, get_keypoints AVERAGE_Y = 0.48 CLUSTERS = ['10', '20', '30', 'all'] -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) def geometric_baseline(joints): @@ -30,6 +28,8 @@ def geometric_baseline(joints): 'right_ankle'] """ + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) cnt_tot = 0 dic_dist = defaultdict(lambda: defaultdict(list)) @@ -100,7 +100,7 @@ def compute_distance(xyz_norm_1, xyz_norm_2, average_y, mode='average', dy_met=0 1. knowing specific height of the annotation (head-ankle) dy_met 2. using mean height of people (average_y) """ - assert mode == 'average' or mode == 'real' + assert mode in ('average', 'real') x1 = float(xyz_norm_1[0]) y1 = float(xyz_norm_1[1]) @@ -115,13 +115,13 @@ def compute_distance(xyz_norm_1, xyz_norm_2, average_y, mode='average', dy_met=0 cc = -dy_met # Solving the linear system Ax = b - Aa = np.array([[y1, 0, -xx], - [0, -y1, 1], - [y2, 0, -xx], - [0, -y2, 1]]) + matrix = np.array([[y1, 0, -xx], + [0, -y1, 1], + [y2, 0, -xx], + [0, -y2, 1]]) bb = np.array([cc * xx, -cc, 0, 0]).reshape(4, 1) - xx = np.linalg.lstsq(Aa, bb, rcond=None) + xx = np.linalg.lstsq(matrix, bb, rcond=None) z_met = abs(np.float(xx[0][1])) # Abs take into account specularity behind the observer return z_met @@ -160,7 +160,7 @@ def calculate_heights(heights, mode): Compute statistics of heights based on the distance """ - assert mode == 'mean' or mode == 'std' or mode == 'max' + assert mode in ('mean', 'std', 'max') heights_fin = {} head_shoulder = np.array(heights['shoulder']) - np.array(heights['head']) @@ -193,4 +193,3 @@ def calculate_error(dic_errors): for clst in dic_errors: errors[clst] = np.float(np.mean(np.array(dic_errors[clst]))) return errors - diff --git a/monoloco/predict/__init__.py b/monoloco/predict/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/predict/factory.py b/monoloco/predict/factory.py similarity index 92% rename from src/predict/factory.py rename to monoloco/predict/factory.py index ae636d4..2f259d7 100644 --- a/src/predict/factory.py +++ b/monoloco/predict/factory.py @@ -2,7 +2,7 @@ import json import os from openpifpaf import show -from visuals.printer import Printer +from ..visuals.printer import Printer def factory_for_gt(im_size, name=None, path_gt=None): @@ -24,7 +24,7 @@ def factory_for_gt(im_size, name=None, path_gt=None): dic_gt = None x_factor = im_size[0] / 1600 y_factor = im_size[1] / 900 - pixel_factor = (x_factor + y_factor) / 2 + pixel_factor = (x_factor + y_factor) / 2 # TODO remove and check it if im_size[0] / im_size[1] > 2.5: kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration else: @@ -45,7 +45,7 @@ def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=N keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:] # Visualizer - keypoint_painter = show.KeypointPainter(show_box=True) + keypoint_painter = show.KeypointPainter(show_box=False) skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True, markersize=1, linewidth=4) @@ -79,7 +79,8 @@ def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=N printer = Printer(images_outputs[1], output_path, kk, output_types=args.output_types , z_max=args.z_max, epistemic=epistemic) figures, axes = printer.factory_axes() - printer.draw(figures, axes, dic_out, images_outputs[1], save=True, show=args.show) + printer.draw(figures, axes, dic_out, images_outputs[1], draw_box=args.draw_box, + save=True, show=args.show) if 'json' in args.output_types: with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff: diff --git a/src/predict/monoloco.py b/monoloco/predict/network.py similarity index 90% rename from src/predict/monoloco.py rename to monoloco/predict/network.py index c54b310..b2efd97 100644 --- a/src/predict/monoloco.py +++ b/monoloco/predict/network.py @@ -8,10 +8,10 @@ from collections import defaultdict import torch -from utils.iou import get_iou_matches, reorder_matches -from utils.camera import get_keypoints, pixel_to_camera, xyz_from_distance -from utils.monoloco import get_monoloco_inputs, unnormalize_bi, laplace_sampling -from models.architectures import LinearModel +from ..utils.iou import get_iou_matches, reorder_matches +from ..utils.camera import get_keypoints, pixel_to_camera, xyz_from_distance +from ..utils.network import get_monoloco_inputs, unnormalize_bi, laplace_sampling +from ..train.architectures import LinearModel class MonoLoco: @@ -64,7 +64,7 @@ class MonoLoco: return outputs, varss @staticmethod - def post_process(outputs, varss, boxes, keypoints, kk, dic_gt, iou_min=0.25): + def post_process(outputs, varss, boxes, keypoints, kk, dic_gt, iou_min=0.3): """Post process monoloco to output final dictionary with all information for visualizations""" dic_out = defaultdict(list) @@ -74,6 +74,7 @@ class MonoLoco: if dic_gt: boxes_gt, dds_gt = dic_gt['boxes'], dic_gt['dds'] matches = get_iou_matches(boxes, boxes_gt, thresh=iou_min) + print("found {} matches with ground-truth".format(len(matches))) else: matches = [(idx, idx) for idx, _ in enumerate(boxes)] # Replicate boxes @@ -98,6 +99,7 @@ class MonoLoco: xyz_real = xyz_from_distance(dd_real, xy_centers[idx]) xyz_pred = xyz_from_distance(dd_pred, xy_centers[idx]) dic_out['boxes'].append(box) + dic_out['boxes_gt'].append(boxes_gt[idx_gt] if dic_gt else boxes[idx]) dic_out['dds_real'].append(dd_real) dic_out['dds_pred'].append(dd_pred) dic_out['stds_ale'].append(ale) diff --git a/src/predict/pifpaf.py b/monoloco/predict/pifpaf.py similarity index 99% rename from src/predict/pifpaf.py rename to monoloco/predict/pifpaf.py index 779a132..a4fe463 100644 --- a/src/predict/pifpaf.py +++ b/monoloco/predict/pifpaf.py @@ -107,4 +107,3 @@ class PifPaf: for kps in keypoint_sets ] return keypoint_sets, scores, pifpaf_out - diff --git a/src/predict/predict.py b/monoloco/predict/predict.py similarity index 93% rename from src/predict/predict.py rename to monoloco/predict/predict.py index b6e15ec..c91b150 100644 --- a/src/predict/predict.py +++ b/monoloco/predict/predict.py @@ -4,10 +4,10 @@ from PIL import Image import torch -from predict.pifpaf import PifPaf, ImageList -from predict.monoloco import MonoLoco -from predict.factory import factory_for_gt, factory_outputs -from utils.pifpaf import preprocess_pif +from ..predict.pifpaf import PifPaf, ImageList +from ..predict.network import MonoLoco +from ..predict.factory import factory_for_gt, factory_outputs +from ..utils.pifpaf import preprocess_pif def predict(args): diff --git a/monoloco/prep/__init__.py b/monoloco/prep/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/features/preprocess_ki.py b/monoloco/prep/preprocess_ki.py similarity index 73% rename from src/features/preprocess_ki.py rename to monoloco/prep/preprocess_ki.py index d9b8bbf..10d2b69 100644 --- a/src/features/preprocess_ki.py +++ b/monoloco/prep/preprocess_ki.py @@ -8,11 +8,12 @@ from collections import defaultdict import json import datetime -from utils.kitti import get_calibration, split_training, parse_ground_truth -from utils.monoloco import get_monoloco_inputs -from utils.pifpaf import preprocess_pif -from utils.iou import get_iou_matches -from utils.misc import append_cluster +from ..prep.transforms import transform_keypoints +from ..utils.kitti import get_calibration, split_training, parse_ground_truth +from ..utils.network import get_monoloco_inputs +from ..utils.pifpaf import preprocess_pif +from ..utils.iou import get_iou_matches +from ..utils.misc import append_cluster class PreprocessKitti: @@ -29,7 +30,7 @@ class PreprocessKitti: clst=defaultdict(lambda: defaultdict(list)))} dic_names = defaultdict(lambda: defaultdict(list)) - def __init__(self, dir_ann, iou_min=0.3): + def __init__(self, dir_ann, iou_min): self.dir_ann = dir_ann self.iou_min = iou_min @@ -52,10 +53,7 @@ class PreprocessKitti: def run(self): """Save json files""" - cnt_gt = 0 - cnt_files = 0 - cnt_files_ped = 0 - cnt_fnf = 0 + cnt_gt = cnt_files = cnt_files_ped = cnt_fnf = 0 dic_cnt = {'train': 0, 'val': 0, 'test': 0} for name in self.names_gt: @@ -73,10 +71,7 @@ class PreprocessKitti: kk = p_left[0] # Iterate over each line of the gt file and save box location and distances - if phase == 'train': - (boxes_gt, boxes_3d, dds_gt, _, _) = parse_ground_truth(path_gt, mode='gt_all') # Also cyclists - else: - (boxes_gt, boxes_3d, dds_gt, _, _) = parse_ground_truth(path_gt, mode='gt') # only pedestrians + boxes_gt, boxes_3d, dds_gt = parse_ground_truth(path_gt, category='all')[:3] self.dic_names[basename + '.png']['boxes'] = copy.deepcopy(boxes_gt) self.dic_names[basename + '.png']['dds'] = copy.deepcopy(dds_gt) @@ -90,7 +85,11 @@ class PreprocessKitti: with open(os.path.join(self.dir_ann, basename + '.png.pifpaf.json'), 'r') as f: annotations = json.load(f) boxes, keypoints = preprocess_pif(annotations, im_size=(1238, 374)) + keypoints_hflip = transform_keypoints(keypoints, mode='flip') inputs = get_monoloco_inputs(keypoints, kk).tolist() + inputs_hflip = get_monoloco_inputs(keypoints, kk).tolist() + all_keypoints = [keypoints, keypoints_hflip] + all_inputs = [inputs, inputs_hflip] except FileNotFoundError: boxes = [] @@ -98,13 +97,15 @@ class PreprocessKitti: # Match each set of keypoint with a ground truth matches = get_iou_matches(boxes, boxes_gt, self.iou_min) for (idx, idx_gt) in matches: - self.dic_jo[phase]['kps'].append(keypoints[idx]) - self.dic_jo[phase]['X'].append(inputs[idx]) - self.dic_jo[phase]['Y'].append([dds_gt[idx_gt]]) # Trick to make it (nn,1) - self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_gt]) - self.dic_jo[phase]['K'].append(kk) - self.dic_jo[phase]['names'].append(name) # One image name for each annotation - append_cluster(self.dic_jo, phase, inputs[idx], dds_gt[idx_gt], keypoints[idx]) + for nn, keypoints in enumerate(all_keypoints): + inputs = all_inputs[nn] + self.dic_jo[phase]['kps'].append(keypoints[idx]) + self.dic_jo[phase]['X'].append(inputs[idx]) + self.dic_jo[phase]['Y'].append([dds_gt[idx_gt]]) # Trick to make it (nn,1) + self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_gt]) + self.dic_jo[phase]['K'].append(kk) + self.dic_jo[phase]['names'].append(name) # One image name for each annotation + append_cluster(self.dic_jo, phase, inputs[idx], dds_gt[idx_gt], keypoints[idx]) dic_cnt[phase] += 1 with open(self.path_joints, 'w') as file: @@ -116,7 +117,8 @@ class PreprocessKitti: .format(dic_cnt[phase], phase)) print("Number of GT files: {}. Files with at least one pedestrian: {}. Files not found: {}" .format(cnt_files, cnt_files_ped, cnt_fnf)) - print("Number of GT annotations: {}".format(cnt_gt)) + print("Matched : {:.1f} % of the ground truth instances" + .format(100 * (dic_cnt['train'] + dic_cnt['val']) / cnt_gt)) print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints)) def _factory_phase(self, name): diff --git a/src/features/preprocess_nu.py b/monoloco/prep/preprocess_nu.py similarity index 58% rename from src/features/preprocess_nu.py rename to monoloco/prep/preprocess_nu.py index e01a9ab..9137d2a 100644 --- a/src/features/preprocess_nu.py +++ b/monoloco/prep/preprocess_nu.py @@ -13,12 +13,13 @@ import numpy as np from nuscenes.nuscenes import NuScenes from nuscenes.utils import splits -from utils.iou import get_iou_matches -from utils.misc import append_cluster -from utils.nuscenes import select_categories -from utils.camera import project_3d -from utils.pifpaf import preprocess_pif -from utils.monoloco import get_monoloco_inputs + +from ..utils.iou import get_iou_matches +from ..utils.misc import append_cluster +from ..utils.nuscenes import select_categories +from ..utils.camera import project_3d +from ..utils.pifpaf import preprocess_pif +from ..utils.network import get_monoloco_inputs class PreprocessNuscenes: @@ -35,7 +36,7 @@ class PreprocessNuscenes: } dic_names = defaultdict(lambda: defaultdict(list)) - def __init__(self, dir_ann, dir_nuscenes, dataset, iou_min=0.3): + def __init__(self, dir_ann, dir_nuscenes, dataset, iou_min): logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger(__name__) @@ -58,21 +59,13 @@ class PreprocessNuscenes: """ Prepare arrays for training """ - cnt_scenes = 0 - cnt_samples = 0 - cnt_sd = 0 - cnt_ann = 0 - + cnt_scenes = cnt_samples = cnt_sd = cnt_ann = 0 start = time.time() - for ii, scene in enumerate(self.scenes): end_scene = time.time() current_token = scene['first_sample_token'] cnt_scenes += 1 - if ii == 0: - time_left = "Nan" - else: - time_left = str((end_scene-start_scene)/60 * (len(self.scenes) - ii))[:4] + time_left = str((end_scene - start_scene) / 60 * (len(self.scenes) - ii))[:4] if ii != 0 else "NaN" sys.stdout.write('\r' + 'Elaborating scene {}, remaining time {} minutes' .format(cnt_scenes, time_left) + '\t\n') @@ -93,29 +86,9 @@ class PreprocessNuscenes: for cam in self.CAMERAS: sd_token = sample_dic['data'][cam] cnt_sd += 1 - path_im, boxes_obj, kk = self.nusc.get_sample_data(sd_token, box_vis_level=1) # At least one corner - kk = kk.tolist() # Extract all the annotations of the person - boxes_gt = [] - dds = [] - boxes_3d = [] - name = os.path.basename(path_im) - for box_obj in boxes_obj: - if box_obj.name[:6] != 'animal': - general_name = box_obj.name.split('.')[0] + '.' + box_obj.name.split('.')[1] - else: - general_name = 'animal' - if general_name in select_categories('all'): - box = project_3d(box_obj, kk) - dd = np.linalg.norm(box_obj.center) - boxes_gt.append(box) - dds.append(dd) - box_3d = box_obj.center.tolist() + box_obj.wlh.tolist() - boxes_3d.append(box_3d) - self.dic_names[name]['boxes'].append(box) - self.dic_names[name]['dds'].append(dd) - self.dic_names[name]['K'] = kk + name, boxes_gt, boxes_3d, dds, kk = self.extract_from_token(sd_token) # Run IoU with pifpaf detections and save path_pif = os.path.join(self.dir_ann, name + '.pifpaf.json') @@ -124,23 +97,24 @@ class PreprocessNuscenes: if exists: with open(path_pif, 'r') as file: annotations = json.load(file) + boxes, keypoints = preprocess_pif(annotations, im_size=(1600, 900)) + else: + continue - boxes, keypoints = preprocess_pif(annotations, im_size=(1600, 900)) + if keypoints: + inputs = get_monoloco_inputs(keypoints, kk).tolist() - if keypoints: - inputs = get_monoloco_inputs(keypoints, kk).tolist() - - matches = get_iou_matches(boxes, boxes_gt, self.iou_min) - for (idx, idx_gt) in matches: - self.dic_jo[phase]['kps'].append(keypoints[idx]) - self.dic_jo[phase]['X'].append(inputs[idx]) - self.dic_jo[phase]['Y'].append([dds[idx_gt]]) # Trick to make it (nn,1) - self.dic_jo[phase]['names'].append(name) # One image name for each annotation - self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_gt]) - self.dic_jo[phase]['K'].append(kk) - append_cluster(self.dic_jo, phase, inputs[idx], dds[idx_gt], keypoints[idx]) - cnt_ann += 1 - sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t') + matches = get_iou_matches(boxes, boxes_gt, self.iou_min) + for (idx, idx_gt) in matches: + self.dic_jo[phase]['kps'].append(keypoints[idx]) + self.dic_jo[phase]['X'].append(inputs[idx]) + self.dic_jo[phase]['Y'].append([dds[idx_gt]]) # Trick to make it (nn,1) + self.dic_jo[phase]['names'].append(name) # One image name for each annotation + self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_gt]) + self.dic_jo[phase]['K'].append(kk) + append_cluster(self.dic_jo, phase, inputs[idx], dds[idx_gt], keypoints[idx]) + cnt_ann += 1 + sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t') current_token = sample_dic['next'] @@ -154,33 +128,55 @@ class PreprocessNuscenes: .format(cnt_ann, cnt_samples, cnt_scenes, (end-start)/60)) print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints)) + def extract_from_token(self, sd_token): + + boxes_gt = [] + dds = [] + boxes_3d = [] + path_im, boxes_obj, kk = self.nusc.get_sample_data(sd_token, box_vis_level=1) # At least one corner + kk = kk.tolist() + name = os.path.basename(path_im) + for box_obj in boxes_obj: + if box_obj.name[:6] != 'animal': + general_name = box_obj.name.split('.')[0] + '.' + box_obj.name.split('.')[1] + else: + general_name = 'animal' + if general_name in select_categories('all'): + box = project_3d(box_obj, kk) + dd = np.linalg.norm(box_obj.center) + boxes_gt.append(box) + dds.append(dd) + box_3d = box_obj.center.tolist() + box_obj.wlh.tolist() + boxes_3d.append(box_3d) + self.dic_names[name]['boxes'].append(box) + self.dic_names[name]['dds'].append(dd) + self.dic_names[name]['K'] = kk + + return name, boxes_gt, boxes_3d, dds, kk + def factory(dataset, dir_nuscenes): """Define dataset type and split training and validation""" assert dataset in ['nuscenes', 'nuscenes_mini', 'nuscenes_teaser'] - - if dataset == 'nuscenes': - nusc = NuScenes(version='v1.0-trainval', dataroot=dir_nuscenes, verbose=True) - scenes = nusc.scene - split_scenes = splits.create_splits_scenes() - split_train, split_val = split_scenes['train'], split_scenes['val'] - - elif dataset == 'nuscenes_mini': - nusc = NuScenes(version='v1.0-mini', dataroot=dir_nuscenes, verbose=True) - scenes = nusc.scene - split_scenes = splits.create_splits_scenes() - split_train, split_val = split_scenes['train'], split_scenes['val'] - + if dataset == 'nuscenes_mini': + version = 'v1.0-mini' else: - nusc = NuScenes(version='v1.0-trainval', dataroot=dir_nuscenes, verbose=True) + version = 'v1.0-trainval' + + nusc = NuScenes(version=version, dataroot=dir_nuscenes, verbose=True) + scenes = nusc.scene + + if dataset == 'nuscenes_teaser': with open("splits/nuscenes_teaser_scenes.txt", "r") as file: teaser_scenes = file.read().splitlines() - scenes = nusc.scene scenes = [scene for scene in scenes if scene['token'] in teaser_scenes] with open("splits/split_nuscenes_teaser.json", "r") as file: dic_split = json.load(file) split_train = [scene['name'] for scene in scenes if scene['token'] in dic_split['train']] split_val = [scene['name'] for scene in scenes if scene['token'] in dic_split['val']] + else: + split_scenes = splits.create_splits_scenes() + split_train, split_val = split_scenes['train'], split_scenes['val'] return nusc, scenes, split_train, split_val diff --git a/monoloco/prep/transforms.py b/monoloco/prep/transforms.py new file mode 100644 index 0000000..5e3bcf7 --- /dev/null +++ b/monoloco/prep/transforms.py @@ -0,0 +1,54 @@ + +import numpy as np + + +COCO_KEYPOINTS = [ + 'nose', # 1 + 'left_eye', # 2 + 'right_eye', # 3 + 'left_ear', # 4 + 'right_ear', # 5 + 'left_shoulder', # 6 + 'right_shoulder', # 7 + 'left_elbow', # 8 + 'right_elbow', # 9 + 'left_wrist', # 10 + 'right_wrist', # 11 + 'left_hip', # 12 + 'right_hip', # 13 + 'left_knee', # 14 + 'right_knee', # 15 + 'left_ankle', # 16 + 'right_ankle', # 17 +] + + +HFLIP = { + 'nose': 'nose', + 'left_eye': 'right_eye', + 'right_eye': 'left_eye', + 'left_ear': 'right_ear', + 'right_ear': 'left_ear', + 'left_shoulder': 'right_shoulder', + 'right_shoulder': 'left_shoulder', + 'left_elbow': 'right_elbow', + 'right_elbow': 'left_elbow', + 'left_wrist': 'right_wrist', + 'right_wrist': 'left_wrist', + 'left_hip': 'right_hip', + 'right_hip': 'left_hip', + 'left_knee': 'right_knee', + 'right_knee': 'left_knee', + 'left_ankle': 'right_ankle', + 'right_ankle': 'left_ankle', +} + + +def transform_keypoints(keypoints, mode): + + assert mode == 'flip', "mode not recognized" + kps = np.array(keypoints) + dic_kps = {key: kps[:, :, idx] for idx, key in enumerate(COCO_KEYPOINTS)} + kps_hflip = np.array([dic_kps[value] for key, value in HFLIP.items()]) + kps_hflip = np.transpose(kps_hflip, (1, 2, 0)) + return kps_hflip.tolist() diff --git a/src/main.py b/monoloco/run.py similarity index 84% rename from src/main.py rename to monoloco/run.py index ad04eaa..554d280 100644 --- a/src/main.py +++ b/monoloco/run.py @@ -1,21 +1,19 @@ +# pylint: skip-file import argparse -import os -import sys -sys.path.insert(0, os.path.join('.', 'features')) -sys.path.insert(0, os.path.join('.', 'models')) from openpifpaf.network import nets from openpifpaf import decoder -from features.preprocess_nu import PreprocessNuscenes -from features.preprocess_ki import PreprocessKitti -from predict.predict import predict -from models.trainer import Trainer -from eval.generate_kitti import generate_kitti -from eval.geom_baseline import geometric_baseline -from models.hyp_tuning import HypTuning -from eval.kitti_eval import KittiEval -from visuals.webcam import webcam + +from .prep.preprocess_nu import PreprocessNuscenes +from .prep.preprocess_ki import PreprocessKitti +from .predict.predict import predict +from .train.trainer import Trainer +from .eval.generate_kitti import GenerateKitti +from .eval.geom_baseline import geometric_baseline +from .train.hyp_tuning import HypTuning +from .eval.eval_kitti import EvalKitti +from .visuals.webcam import webcam def cli(): @@ -37,6 +35,7 @@ def cli(): default='nuscenes') prep_parser.add_argument('--dir_nuscenes', help='directory of nuscenes devkit', default='data/nuscenes/') + prep_parser.add_argument('--iou_min', help='minimum iou to match ground truth', type=float, default=0.3) # Predict (2D pose and/or 3D location from images) # General @@ -59,9 +58,9 @@ def cli(): default="data/models/monoloco-190513-1437.pkl") predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=256) predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization', - default='data/arrays/names-kitti-190513-1754.json') + default='data/arrays/names-kitti-190710-1206.json') predict_parser.add_argument('--transform', help='transformation for the pose', default='None') - predict_parser.add_argument('--draw_kps', help='to draw kps in the images', action='store_true') + predict_parser.add_argument('--draw_box', help='to draw box in the images', action='store_true') predict_parser.add_argument('--predict', help='whether to make prediction', action='store_true') predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=22) predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0) @@ -87,7 +86,7 @@ def cli(): # Evaluation eval_parser.add_argument('--dataset', help='datasets to evaluate, kitti or nuscenes', default='kitti') - eval_parser.add_argument('--geometric', help='to evaluate geometric distance', action='store_true') + eval_parser.add_argument('--geometric', help='to evaluate geometric distance', action='store_true') eval_parser.add_argument('--generate', help='create txt files for KITTI evaluation', action='store_true') eval_parser.add_argument('--dir_ann', help='directory of annotations of 2d joints (for KITTI evaluation') eval_parser.add_argument('--model', help='path of MonoLoco model to load', required=True) @@ -96,7 +95,9 @@ def cli(): eval_parser.add_argument('--dropout', type=float, help='dropout. Default no dropout', default=0.2) eval_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=256) eval_parser.add_argument('--n_stage', type=int, help='Number of stages in the model', default=3) - eval_parser.add_argument('--show', help='whether to show eval statistics', action='store_true') + eval_parser.add_argument('--show', help='whether to show statistic graphs', action='store_true') + eval_parser.add_argument('--verbose', help='verbosity of statistics', action='store_true') + eval_parser.add_argument('--stereo', help='include stereo baseline results', action='store_true') args = parser.parse_args() return args @@ -113,10 +114,10 @@ def main(): elif args.command == 'prep': if 'nuscenes' in args.dataset: - prep = PreprocessNuscenes(args.dir_ann, args.dir_nuscenes, args.dataset) + prep = PreprocessNuscenes(args.dir_ann, args.dir_nuscenes, args.dataset, args.iou_min) prep.run() if 'kitti' in args.dataset: - prep = PreprocessKitti(args.dir_ann) + prep = PreprocessKitti(args.dir_ann, args.iou_min) prep.run() elif args.command == 'train': @@ -139,10 +140,13 @@ def main(): geometric_baseline(args.joints) if args.generate: - generate_kitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout) + kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout) + kitti_txt.run_mono() + if args.stereo: + kitti_txt.run_stereo() if args.dataset == 'kitti': - kitti_eval = KittiEval() + kitti_eval = EvalKitti(verbose=args.verbose, stereo=args.stereo) kitti_eval.run() kitti_eval.printer(show=args.show) diff --git a/monoloco/train/__init__.py b/monoloco/train/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/architectures.py b/monoloco/train/architectures.py similarity index 63% rename from src/models/architectures.py rename to monoloco/train/architectures.py index b1b5894..1dbfbb1 100644 --- a/src/models/architectures.py +++ b/monoloco/train/architectures.py @@ -3,47 +3,47 @@ import torch.nn as nn class TriLinear(nn.Module): - """ - As Bilinear but without skip connection - """ - def __init__(self, input_size, output_size, p_dropout, linear_size=1024): - super(TriLinear, self).__init__() + """ + As Bilinear but without skip connection + """ + def __init__(self, input_size, output_size, p_dropout, linear_size=1024): + super(TriLinear, self).__init__() - self.input_size = input_size - self.output_size = output_size - self.l_size = linear_size + self.input_size = input_size + self.output_size = output_size + self.l_size = linear_size - self.relu = nn.ReLU(inplace=True) - self.dropout = nn.Dropout(p_dropout) + self.relu = nn.ReLU(inplace=True) + self.dropout = nn.Dropout(p_dropout) - self.w1 = nn.Linear(self.input_size, self.l_size) - self.batch_norm1 = nn.BatchNorm1d(self.l_size) + self.w1 = nn.Linear(self.input_size, self.l_size) + self.batch_norm1 = nn.BatchNorm1d(self.l_size) - self.w2 = nn.Linear(self.l_size, self.l_size) - self.batch_norm2 = nn.BatchNorm1d(self.l_size) + self.w2 = nn.Linear(self.l_size, self.l_size) + self.batch_norm2 = nn.BatchNorm1d(self.l_size) - self.w3 = nn.Linear(self.l_size, self.output_size) + self.w3 = nn.Linear(self.l_size, self.output_size) - def forward(self, x): - y = self.w1(x) - y = self.batch_norm1(y) - y = self.relu(y) - y = self.dropout(y) + def forward(self, x): + y = self.w1(x) + y = self.batch_norm1(y) + y = self.relu(y) + y = self.dropout(y) - y = self.w2(y) - y = self.batch_norm2(y) - y = self.relu(y) - y = self.dropout(y) + y = self.w2(y) + y = self.batch_norm2(y) + y = self.relu(y) + y = self.dropout(y) - y = self.w3(y) + y = self.w3(y) - return y + return y -def weight_init(m): +def weight_init(batch): """TO initialize weights using kaiming initialization""" - if isinstance(m, nn.Linear): - nn.init.kaiming_normal_(m.weight) + if isinstance(batch, nn.Linear): + nn.init.kaiming_normal_(batch.weight) class Linear(nn.Module): @@ -93,7 +93,7 @@ class LinearModel(nn.Module): self.batch_norm1 = nn.BatchNorm1d(self.linear_size) self.linear_stages = [] - for l in range(num_stage): + for _ in range(num_stage): self.linear_stages.append(Linear(self.linear_size, self.p_dropout)) self.linear_stages = nn.ModuleList(self.linear_stages) @@ -109,11 +109,8 @@ class LinearModel(nn.Module): y = self.batch_norm1(y) y = self.relu(y) y = self.dropout(y) - # linear layers for i in range(self.num_stage): y = self.linear_stages[i](y) - y = self.w2(y) - - return y \ No newline at end of file + return y diff --git a/src/models/datasets.py b/monoloco/train/datasets.py similarity index 99% rename from src/models/datasets.py rename to monoloco/train/datasets.py index 56fce28..6a58655 100644 --- a/src/models/datasets.py +++ b/monoloco/train/datasets.py @@ -54,10 +54,3 @@ class KeypointsDataset(Dataset): count = len(self.dic_clst[clst]['Y']) return inputs, outputs, count - - - - - - - diff --git a/src/models/hyp_tuning.py b/monoloco/train/hyp_tuning.py similarity index 90% rename from src/models/hyp_tuning.py rename to monoloco/train/hyp_tuning.py index a704ab3..a0fc178 100644 --- a/src/models/hyp_tuning.py +++ b/monoloco/train/hyp_tuning.py @@ -1,13 +1,16 @@ + import math import os import json import time import logging -import torch import random import datetime + +import torch import numpy as np -from models.trainer import Trainer + +from .trainer import Trainer class HypTuning: @@ -30,12 +33,10 @@ class HypTuning: if not os.path.exists(dir_logs): os.makedirs(dir_logs) - now = datetime.datetime.now() - now_time = now.strftime("%Y%m%d-%H%M")[2:] name_out = 'hyp-baseline-' if baseline else 'hyp-monoloco-' - self.path_log = os.path.join(dir_logs, name_out + now_time) - self.path_model = os.path.join(dir_out, name_out + now_time + '.pkl') + self.path_log = os.path.join(dir_logs, name_out) + self.path_model = os.path.join(dir_out, name_out) logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger(__name__) @@ -49,7 +50,7 @@ class HypTuning: random.shuffle(self.sched_step) self.bs_list = [64, 128, 256, 512, 1024, 2048] * multiplier random.shuffle(self.bs_list) - self.hidden_list = [128, 256, 512, 128, 256, 512] * multiplier + self.hidden_list = [256, 256, 256, 256, 256, 256] * multiplier random.shuffle(self.hidden_list) self.n_stage_list = [3, 3, 3, 3, 3, 3] * multiplier random.shuffle(self.n_stage_list) @@ -104,11 +105,14 @@ class HypTuning: dic_err_best = dic_err best_acc_val = acc_val model_best = model - torch.save(model_best.state_dict(), self.path_model) - - with open(self.path_log, 'w') as f: - json.dump(dic_best, f) + # Save model and log + now = datetime.datetime.now() + now_time = now.strftime("%Y%m%d-%H%M")[2:] + self.path_model = self.path_model + now_time + '.pkl' + torch.save(model_best.state_dict(), self.path_model) + with open(self.path_log + now_time, 'w') as f: + json.dump(dic_best, f) end = time.time() print('\n\n\n') self.logger.info(" Tried {} combinations".format(cnt)) diff --git a/src/models/losses.py b/monoloco/train/losses.py similarity index 95% rename from src/models/losses.py rename to monoloco/train/losses.py index 4572ada..83d896c 100644 --- a/src/models/losses.py +++ b/monoloco/train/losses.py @@ -52,8 +52,6 @@ class CustomL1Loss(torch.nn.Module): weights = torch.from_numpy(weights_np).float().to(self.device) # To make weights in the same cuda device losses = torch.abs(output - target) * weights loss = losses.mean() # Mean over the batch - # self.print_loss() - return loss @@ -66,7 +64,7 @@ class LaplacianLoss(torch.nn.Module): self.reduce = reduce self.evaluate = evaluate - def laplacian_1d(self, mu_si, xx): + def laplacian_1d(self, mu_si, xx): """ 1D Gaussian Loss. f(x | mu, sigma). The network outputs mu and sigma. X is the ground truth distance. This supports backward(). @@ -84,8 +82,7 @@ class LaplacianLoss(torch.nn.Module): if self.evaluate: return norm_bi - else: - return term_a + term_b + return term_a + term_b def forward(self, outputs, targets): @@ -109,13 +106,12 @@ class GaussianLoss(torch.nn.Module): self.evaluate = evaluate self.device = device - def gaussian_1d(self, mu_si, xx): + def gaussian_1d(self, mu_si, xx): """ 1D Gaussian Loss. f(x | mu, sigma). The network outputs mu and sigma. X is the ground truth distance. This supports backward(). Inspired by https://github.com/naba89/RNN-Handwriting-Generation-Pytorch/blob/master/loss_functions.py - """ mu, si = mu_si[:, 0:1], mu_si[:, 1:2] @@ -129,8 +125,8 @@ class GaussianLoss(torch.nn.Module): if self.evaluate: return norm_si - else: - return term_a + term_b + + return term_a + term_b def forward(self, outputs, targets): diff --git a/src/models/trainer.py b/monoloco/train/trainer.py similarity index 85% rename from src/models/trainer.py rename to monoloco/train/trainer.py index 711d69e..9b2b37f 100644 --- a/src/models/trainer.py +++ b/monoloco/train/trainer.py @@ -1,3 +1,9 @@ +# pylint: skip-file # TODO + +""" +Training and evaluation of a neural network which predicts 3D localization and confidence intervals +given 2d joints +""" import copy import os @@ -13,19 +19,14 @@ import torch.nn as nn from torch.utils.data import DataLoader from torch.optim import lr_scheduler -from models.datasets import KeypointsDataset -from models.architectures import LinearModel -from models.losses import LaplacianLoss -from utils.logs import set_logger -from utils.monoloco import epistemic_variance, laplace_sampling, unnormalize_bi +from .datasets import KeypointsDataset +from .architectures import LinearModel +from .losses import LaplacianLoss +from ..utils.logs import set_logger +from ..utils.network import laplace_sampling, unnormalize_bi class Trainer: - """ - Training and evaluation of a neural network which predicts 3D localization and confidence intervals - given 2d joints - """ - def __init__(self, joints, epochs=100, bs=256, dropout=0.2, lr=0.002, sched_step=20, sched_gamma=1, hidden_size=256, n_stage=3, r_seed=1, n_dropout=0, n_samples=100, baseline=False, save=False, print_loss=False): @@ -123,10 +124,7 @@ class Trainer: best_model_wts = copy.deepcopy(self.model.state_dict()) best_acc = 1e6 best_epoch = 0 - epoch_losses_tr = [] - epoch_losses_val = [] - epoch_norms = [] - epoch_sis = [] + epoch_losses_tr = epoch_losses_val = epoch_norms = epoch_sis = [] for epoch in range(self.num_epochs): @@ -138,10 +136,7 @@ class Trainer: else: self.model.eval() # Set model to evaluate mode - running_loss_tr = 0.0 - running_loss_eval = 0.0 - norm_tr = 0.0 - bi_tr = 0.0 + running_loss_tr = running_loss_eval = norm_tr = bi_tr = 0.0 # Iterate over data. for inputs, labels, _, _ in self.dataloaders[phase]: @@ -156,10 +151,7 @@ class Trainer: with torch.set_grad_enabled(phase == 'train'): outputs = self.model(inputs) - if self.output_size == 2: - outputs_eval = outputs[:, 0:1] # Fundamental to put slices - else: - outputs_eval = outputs + outputs_eval = outputs[:, 0:1] if self.output_size == 2 else outputs loss = self.criterion(outputs, labels) loss_eval = self.criterion_eval(outputs_eval, labels) # L1 loss to evaluation @@ -196,7 +188,8 @@ class Trainer: time_elapsed = time.time() - since print('\n\n' + '-'*120) - self.logger.info('Training:\nTraining complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) + self.logger.info('Training:\nTraining complete in {:.0f}m {:.0f}s' + .format(time_elapsed // 60, time_elapsed % 60)) self.logger.info('Best validation Accuracy: {:.3f}'.format(best_acc)) self.logger.info('Saved weights of the model at epoch: {}'.format(best_epoch)) @@ -251,7 +244,7 @@ class Trainer: total_outputs = torch.empty((0, len(labels))).to(self.device) if self.n_dropout > 0: - for ii in range(self.n_dropout): + for _ in range(self.n_dropout): outputs = self.model(inputs) outputs = unnormalize_bi(outputs) samples = laplace_sampling(outputs, self.n_samples) @@ -269,8 +262,6 @@ class Trainer: if not self.baseline: outputs = unnormalize_bi(outputs) - avg_distance = float(self.criterion_eval(outputs[:, 0:1], labels).item()) - dic_err[phase]['all'] = self.compute_stats(outputs, labels, varss, dic_err[phase]['all'], size_eval) print('-'*120) @@ -323,26 +314,25 @@ class Trainer: if self.baseline: return (mean_mu, max_mu), (0, 0, 0) - else: - mean_bi = torch.mean(outputs[:, 1]).item() + mean_bi = torch.mean(outputs[:, 1]).item() - low_bound_bi = labels >= (outputs[:, 0] - outputs[:, 1]) - up_bound_bi = labels <= (outputs[:, 0] + outputs[:, 1]) - bools_bi = low_bound_bi & up_bound_bi - conf_bi = float(torch.sum(bools_bi)) / float(bools_bi.shape[0]) + low_bound_bi = labels >= (outputs[:, 0] - outputs[:, 1]) + up_bound_bi = labels <= (outputs[:, 0] + outputs[:, 1]) + bools_bi = low_bound_bi & up_bound_bi + conf_bi = float(torch.sum(bools_bi)) / float(bools_bi.shape[0]) - # if varss[0] >= 0: - # mean_var = torch.mean(varss).item() - # max_var = torch.max(varss).item() - # - # low_bound_var = labels >= (outputs[:, 0] - varss) - # up_bound_var = labels <= (outputs[:, 0] + varss) - # bools_var = low_bound_var & up_bound_var - # conf_var = float(torch.sum(bools_var)) / float(bools_var.shape[0]) + # if varss[0] >= 0: + # mean_var = torch.mean(varss).item() + # max_var = torch.max(varss).item() + # + # low_bound_var = labels >= (outputs[:, 0] - varss) + # up_bound_var = labels <= (outputs[:, 0] + varss) + # bools_var = low_bound_var & up_bound_var + # conf_var = float(torch.sum(bools_var)) / float(bools_var.shape[0]) - dic_err['mean'] += mean_mu * (outputs.size(0) / size_eval) - dic_err['bi'] += mean_bi * (outputs.size(0) / size_eval) - dic_err['count'] += (outputs.size(0) / size_eval) - dic_err['conf_bi'] += conf_bi * (outputs.size(0) / size_eval) + dic_err['mean'] += mean_mu * (outputs.size(0) / size_eval) + dic_err['bi'] += mean_bi * (outputs.size(0) / size_eval) + dic_err['count'] += (outputs.size(0) / size_eval) + dic_err['conf_bi'] += conf_bi * (outputs.size(0) / size_eval) - return dic_err + return dic_err diff --git a/monoloco/utils/__init__.py b/monoloco/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/camera.py b/monoloco/utils/camera.py similarity index 80% rename from src/utils/camera.py rename to monoloco/utils/camera.py index 619939f..51200f0 100644 --- a/src/utils/camera.py +++ b/monoloco/utils/camera.py @@ -10,9 +10,9 @@ def pixel_to_camera(uv_tensor, kk, z_met): It accepts lists or tensors of (m, 2) or (m, x, 2) or (m, 2, x) where x is the number of keypoints """ - if type(uv_tensor) == list: + if isinstance(uv_tensor, list): uv_tensor = torch.tensor(uv_tensor) - if type(kk) == list: + if isinstance(kk, list): kk = torch.tensor(kk) if uv_tensor.size()[-1] != 2: uv_tensor = uv_tensor.permute(0, 2, 1) # permute to have 2 as last dim to be padded @@ -42,7 +42,7 @@ def project_3d(box_obj, kk): box_2d = [] # Obtain the 3d points of the box xc, yc, zc = box_obj.center - ww, ll, hh, = box_obj.wlh + ww, _, hh, = box_obj.wlh # Points corresponding to a box at the z of the center x1 = xc - ww/2 @@ -70,7 +70,7 @@ def get_keypoints(keypoints, mode): Input --> list or torch.tensor [(m, 3, 17) or (3, 17)] Output --> torch.tensor [(m, 2)] """ - if type(keypoints) == list: + if isinstance(keypoints, list): keypoints = torch.tensor(keypoints) if len(keypoints.size()) == 2: # add batch dim keypoints = keypoints.unsqueeze(0) @@ -109,17 +109,15 @@ def get_keypoints(keypoints, mode): def transform_kp(kps, tr_mode): """Apply different transformations to the keypoints based on the tr_mode""" - assert tr_mode == "None" or tr_mode == "singularity" or tr_mode == "upper" or tr_mode == "lower" \ - or tr_mode == "horizontal" or tr_mode == "vertical" or tr_mode == "lateral" \ - or tr_mode == 'shoulder' or tr_mode == 'knee' or tr_mode == 'upside' or tr_mode == 'falling' \ - or tr_mode == 'random' + assert tr_mode in ("None", "singularity", "upper", "lower", "horizontal", "vertical", "lateral", + 'shoulder', 'knee', 'upside', 'falling', 'random') uu_c, vv_c = get_keypoints(kps, mode='center') if tr_mode == "None": return kps - elif tr_mode == "singularity": + if tr_mode == "singularity": uus = [uu_c for uu in kps[0]] vvs = [vv_c for vv in kps[1]] @@ -131,23 +129,6 @@ def transform_kp(kps, tr_mode): uus = kps[0] vvs = [vv_c for vv in kps[1]] - elif tr_mode == 'lower': - uus = kps[0] - vvs = kps[1][:9] + [vv_c for vv in kps[1][9:]] - - elif tr_mode == 'upper': - uus = kps[0] - vvs = [vv_c for vv in kps[1][:9]] + kps[1][9:] - - elif tr_mode == 'lateral': - uus = [] - for idx, kp in enumerate(kps[0]): - if idx % 2 == 1: - uus.append(kp) - else: - uus.append(uu_c) - vvs = kps[1] - elif tr_mode == 'shoulder': uus = kps[0] vvs = kps[1][:7] + [kps[1][6] for vv in kps[1][7:]] @@ -183,7 +164,7 @@ def xyz_from_distance(distances, xy_centers): xy_centers --> tensor(m,3) or (3) """ - if type(distances) == float: + if isinstance(distances, float): distances = torch.tensor(distances).unsqueeze(0) if len(distances.size()) == 1: distances = distances.unsqueeze(1) @@ -193,16 +174,3 @@ def xyz_from_distance(distances, xy_centers): assert xy_centers.size()[-1] == 3 and distances.size()[-1] == 1, "Size of tensor not recognized" return xy_centers * distances / torch.sqrt(1 + xy_centers[:, 0:1].pow(2) + xy_centers[:, 1:2].pow(2)) - - -def pixel_to_camera_old(uv1, kk, z_met): - """ - (3,) array --> (3,) array - Convert a point in pixel coordinate to absolute camera coordinates - """ - if len(uv1) == 2: - uv1.append(1) - kk_1 = np.linalg.inv(kk) - xyz_met_norm = np.dot(kk_1, uv1) - xyz_met = xyz_met_norm * z_met - return xyz_met diff --git a/src/utils/iou.py b/monoloco/utils/iou.py similarity index 99% rename from src/utils/iou.py rename to monoloco/utils/iou.py index 15235c9..2c492a4 100644 --- a/src/utils/iou.py +++ b/monoloco/utils/iou.py @@ -68,5 +68,3 @@ def reorder_matches(matches, boxes, mode='left_rigth'): matches_left = [idx for (idx, _) in matches] return [matches[matches_left.index(idx_boxes)] for idx_boxes in ordered_boxes if idx_boxes in matches_left] - - diff --git a/src/utils/kitti.py b/monoloco/utils/kitti.py similarity index 81% rename from src/utils/kitti.py rename to monoloco/utils/kitti.py index 40d3420..224509a 100644 --- a/src/utils/kitti.py +++ b/monoloco/utils/kitti.py @@ -1,6 +1,7 @@ +import math + import numpy as np -import math def get_calibration(path_txt): @@ -69,28 +70,27 @@ def get_simplified_calibration(path_txt): raise ValueError('Matrix K_02 not found in the file') -def check_conditions(line, mode, thresh=0.3): +def check_conditions(line, category, method, thresh=0.3): """Check conditions of our or m3d txt file""" check = False - assert mode in ['gt', 'gt_all', 'm3d', '3dop','our'], "Mode %r not recognized" % mode + assert method in ['gt', 'm3d', '3dop', 'our'], "Method %r not recognized" % method + assert category in ['pedestrian', 'cyclist', 'all'] - if mode == 'm3d' or mode == '3dop': + if method in ('m3d', '3dop'): conf = line.split()[15] - if line[:10] == 'pedestrian' and float(conf) >= thresh: + if line.split()[0] == category and float(conf) >= thresh: check = True - elif mode == 'gt': - # if line[:10] == 'Pedestrian' or line[:10] == 'Person_sit': - if line[:10] == 'Pedestrian': + elif method == 'gt': + if category == 'all': + categories_gt = ['Pedestrian', 'Person_sitting', 'Cyclist'] + else: + categories_gt = [category.upper()[0] + category[1:]] # Upper case names + if line.split()[0] in categories_gt: check = True - # Consider also person sitting and cyclists categories - elif mode == 'gt_all': - if line[:10] == 'Pedestrian' or line[:10] == 'Person_sit' or line[:7] == 'Cyclist': - check = True - - elif mode == 'our': + elif method == 'our': if line[4] >= thresh: check = True @@ -130,23 +130,25 @@ def split_training(names_gt, path_train, path_val): return set_train, set_val -def parse_ground_truth(path_gt, mode='gt'): +def parse_ground_truth(path_gt, category): """Parse KITTI ground truth files""" boxes_gt = [] dds_gt = [] + zzs_gt = [] truncs_gt = [] # Float from 0 to 1 occs_gt = [] # Either 0,1,2,3 fully visible, partly occluded, largely occluded, unknown boxes_3d = [] with open(path_gt, "r") as f_gt: for line_gt in f_gt: - if check_conditions(line_gt, mode=mode): + if check_conditions(line_gt, category, method='gt'): truncs_gt.append(float(line_gt.split()[1])) occs_gt.append(int(line_gt.split()[2])) boxes_gt.append([float(x) for x in line_gt.split()[4:8]]) loc_gt = [float(x) for x in line_gt.split()[11:14]] wlh = [float(x) for x in line_gt.split()[8:11]] boxes_3d.append(loc_gt + wlh) + zzs_gt.append(loc_gt[2]) dds_gt.append(math.sqrt(loc_gt[0] ** 2 + loc_gt[1] ** 2 + loc_gt[2] ** 2)) - return boxes_gt, boxes_3d, dds_gt, truncs_gt, occs_gt + return boxes_gt, boxes_3d, dds_gt, zzs_gt, truncs_gt, occs_gt diff --git a/src/utils/logs.py b/monoloco/utils/logs.py similarity index 100% rename from src/utils/logs.py rename to monoloco/utils/logs.py diff --git a/src/utils/misc.py b/monoloco/utils/misc.py similarity index 64% rename from src/utils/misc.py rename to monoloco/utils/misc.py index bf567e7..b2ffd4d 100644 --- a/src/utils/misc.py +++ b/monoloco/utils/misc.py @@ -1,4 +1,6 @@ +import random + def append_cluster(dic_jo, phase, xx, dd, kps): """Append the annotation based on its distance""" @@ -24,11 +26,21 @@ def append_cluster(dic_jo, phase, xx, dd, kps): dic_jo[phase]['clst']['>30']['Y'].append([dd]) -def get_task_error(dd): +def get_task_error(dd, mode='std'): """Get target error not knowing the gender""" - mm_gender = 0.0556 + assert mode in ('std', 'mad') + if mode == 'std': + mm_gender = 0.0557 + elif mode == 'mad': # mean absolute deviation + mm_gender = 0.0457 return mm_gender * dd +def get_pixel_error(dd_gt, zz_gt): + """calculate error in stereo distance due to +-1 pixel mismatch (function of depth)""" - + disp = 0.54 * 721 / zz_gt + random.seed(1) + sign = random.choice((-1, 1)) + delta_z = zz_gt - 0.54 * 721 / (disp + sign) + return dd_gt + delta_z diff --git a/src/utils/monoloco.py b/monoloco/utils/network.py similarity index 90% rename from src/utils/monoloco.py rename to monoloco/utils/network.py index ac6211d..14b6507 100644 --- a/src/utils/monoloco.py +++ b/monoloco/utils/network.py @@ -1,7 +1,7 @@ import numpy as np import torch -from utils.camera import get_keypoints, pixel_to_camera +from ..utils.camera import get_keypoints, pixel_to_camera def get_monoloco_inputs(keypoints, kk): @@ -16,8 +16,9 @@ def get_monoloco_inputs(keypoints, kk): kk = torch.tensor(kk) # Projection in normalized image coordinates and zero-center with the center of the bounding box uv_center = get_keypoints(keypoints, mode='center') - xy1_center = pixel_to_camera(uv_center, kk, 1) * 10 - xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 1) * 10 + xy1_center = pixel_to_camera(uv_center, kk, 10) + xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10) + # xy1_center[:, 1].fill_(0) #TODO kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3) kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view return kps_out diff --git a/src/utils/nuscenes.py b/monoloco/utils/nuscenes.py similarity index 88% rename from src/utils/nuscenes.py rename to monoloco/utils/nuscenes.py index 8217fda..90fe7b3 100644 --- a/src/utils/nuscenes.py +++ b/monoloco/utils/nuscenes.py @@ -23,7 +23,7 @@ def get_unique_tokens(list_fin): return list_token_scene -def split_scenes(list_token_scene, tr, val, dir_main, save=False, load=True): +def split_scenes(list_token_scene, train, val, dir_main, save=False, load=True): """ Split the list according tr, val percentages (test percentage is a consequence) after shuffling the order """ @@ -34,7 +34,7 @@ def split_scenes(list_token_scene, tr, val, dir_main, save=False, load=True): random.seed(1) random.shuffle(list_token_scene) # it shuffles in place n_scenes = len(list_token_scene) - n_train = round(n_scenes * tr / 100) + n_train = round(n_scenes * train / 100) n_val = round(n_scenes * val / 100) list_train = list_token_scene[0: n_train] list_val = list_token_scene[n_train: n_train + n_val] @@ -55,18 +55,16 @@ def select_categories(cat): """ Choose the categories to extract annotations from """ - assert cat == 'person' or cat == 'all' or cat == 'car' + assert cat in ['person', 'all', 'car', 'cyclist'] if cat == 'person': categories = ['human.pedestrian'] - elif cat == 'all': - categories = ['human.pedestrian', - 'vehicle.bicycle', 'vehicle.motorcycle'] - + categories = ['human.pedestrian', 'vehicle.bicycle', 'vehicle.motorcycle'] + elif cat == 'cyclist': + categories = ['vehicle.bicycle'] elif cat == 'car': categories = ['vehicle'] - return categories diff --git a/monoloco/utils/pifpaf.py b/monoloco/utils/pifpaf.py new file mode 100644 index 0000000..01e01b0 --- /dev/null +++ b/monoloco/utils/pifpaf.py @@ -0,0 +1,54 @@ + +import numpy as np + + +def preprocess_pif(annotations, im_size=None): + """ + Preprocess pif annotations: + 1. enlarge the box of 10% + 2. Constraint it inside the image (if image_size provided) + """ + + boxes = [] + keypoints = [] + + for dic in annotations: + box = dic['bbox'] + if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0) + return [], [] + + kps = prepare_pif_kps(dic['keypoints']) + conf = float(np.sort(np.array(kps[2]))[-3]) # The confidence is the 3rd highest value for the keypoints + + # Add 15% for y and 20% for x + delta_h = (box[3] - box[1]) / 7 + delta_w = (box[2] - box[0]) / 3.5 + assert delta_h > -5 and delta_w > -5, "Bounding box <=0" + box[0] -= delta_w + box[1] -= delta_h + box[2] += delta_w + box[3] += delta_h + + # Put the box inside the image + if im_size is not None: + box[0] = max(0, box[0]) + box[1] = max(0, box[1]) + box[2] = min(box[2], im_size[0]) + box[3] = min(box[3], im_size[1]) + + box.append(conf) + boxes.append(box) + keypoints.append(kps) + + return boxes, keypoints + + +def prepare_pif_kps(kps_in): + """Convert from a list of 51 to a list of 3, 17""" + + assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3" + xxs = kps_in[0:][::3] + yys = kps_in[1:][::3] # from offset 1 every 3 + ccs = kps_in[2:][::3] + + return [xxs, yys, ccs] diff --git a/monoloco/utils/stereo.py b/monoloco/utils/stereo.py new file mode 100644 index 0000000..2045289 --- /dev/null +++ b/monoloco/utils/stereo.py @@ -0,0 +1,87 @@ + +import copy +import warnings + +import numpy as np + + +def depth_from_disparity(zzs, kps, kps_right): + """Associate instances in left and right images and compute disparity""" + zzs_stereo = [] + zzs = np.array(zzs) + kps = np.array(kps) + kps_right_list = copy.deepcopy(kps_right) + cnt_stereo = 0 + expected_disps = 0.54 * 721 / np.array(zzs) + + for idx, zz_mono in enumerate(zzs): + if kps_right_list: + + zz_stereo, disparity_x, disparity_y, idx_min = filter_disparities(kps, kps_right_list, idx, expected_disps) + + if verify_stereo(zz_stereo, zz_mono, disparity_x, disparity_y): + zzs_stereo.append(zz_stereo) + cnt_stereo += 1 + kps_right_list.pop(idx_min) + else: + zzs_stereo.append(zz_mono) + else: + zzs_stereo.append(zz_mono) + + return zzs_stereo, cnt_stereo + + +def filter_disparities(kps, kps_right_list, idx, expected_disps): + """filter joints based on confidence and interquartile range of the distribution""" + + CONF_MIN = 0.3 + kps_right = np.array(kps_right_list) + with warnings.catch_warnings() and np.errstate(invalid='ignore'): + try: + disparity_x = kps[idx, 0, :] - kps_right[:, 0, :] + disparity_y = kps[idx, 1, :] - kps_right[:, 1, :] + + # Mask for low confidence + mask_conf_left = kps[idx, 2, :] > CONF_MIN + mask_conf_right = kps_right[:, 2, :] > CONF_MIN + mask_conf = mask_conf_left & mask_conf_right + disparity_x_conf = np.where(mask_conf, disparity_x, np.nan) + disparity_y_conf = np.where(mask_conf, disparity_y, np.nan) + + # Mask outliers using iqr + mask_outlier = get_iqr_mask(disparity_x_conf) + disparity_x_mask = np.where(mask_outlier, disparity_x_conf, np.nan) + disparity_y_mask = np.where(mask_outlier, disparity_y_conf, np.nan) + avg_disparity_x = np.nanmedian(disparity_x_mask, axis=1) # ignore the nan + diffs_x = [abs(expected_disps[idx] - real) for real in avg_disparity_x] + idx_min = diffs_x.index(min(diffs_x)) + zz_stereo = 0.54 * 721. / float(avg_disparity_x[idx_min]) + + except ZeroDivisionError: + zz_stereo = - 100 + + return zz_stereo, disparity_x_mask[idx_min], disparity_y_mask[idx_min], idx_min + + +def verify_stereo(zz_stereo, zz_mono, disparity_x, disparity_y): + + COV_MIN = 0.1 + y_max_difference = (50 / zz_mono) + z_max_difference = 0.6 * zz_mono + + cov = float(np.nanstd(disparity_x) / np.abs(np.nanmean(disparity_x))) # Coefficient of variation + avg_disparity_y = np.nanmedian(disparity_y) + + if abs(zz_stereo - zz_mono) < z_max_difference and \ + avg_disparity_y < y_max_difference and \ + cov < COV_MIN: + return True + return False + + +def get_iqr_mask(distribution): + quartile_1, quartile_3 = np.nanpercentile(distribution, [25, 75], axis=1) + iqr = quartile_3 - quartile_1 + lower_bound = quartile_1 - (iqr * 1.5) + upper_bound = quartile_3 + (iqr * 1.5) + return (distribution < upper_bound.reshape(-1, 1)) & (distribution > lower_bound.reshape(-1, 1)) diff --git a/monoloco/visuals/__init__.py b/monoloco/visuals/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/visuals/paper.py b/monoloco/visuals/paper.py similarity index 74% rename from src/visuals/paper.py rename to monoloco/visuals/paper.py index 01b6cf4..1e1c17b 100644 --- a/src/visuals/paper.py +++ b/monoloco/visuals/paper.py @@ -1,15 +1,15 @@ +# pylint: skip-file import numpy as np import os +import math import matplotlib.pyplot as plt from matplotlib.patches import Ellipse from visuals.printer import get_angle -from visuals.printer import get_confidence def paper(): """Print paper figures""" - dir_out = os.path.join('data', 'all_images', 'paper') method = True task_error = True @@ -75,7 +75,7 @@ def paper(): plt.yticks([]) plt.xlabel('X [m]') plt.ylabel('Z [m]') - plt.savefig(os.path.join(dir_out, fig_name)) + # plt.savefig(os.path.join('docs', fig_name)) plt.show() plt.close() @@ -107,7 +107,7 @@ def paper(): plt.xlabel("Distance from the camera [m]") plt.ylabel("Localization error due to human height variation [m]") plt.legend(loc=(0.01, 0.55)) # Location from 0 to 1 from lower left - plt.savefig(os.path.join(dir_out, fig_name)) + # plt.savefig(os.path.join(dir_out, fig_name)) plt.show() plt.close() @@ -121,11 +121,21 @@ def gmm(): std_men = 7 mu_women = 165 std_women = 7 - N_men = np.random.normal(mu_men, std_men, 100000) - N_women = np.random.normal(mu_women, std_women, 100000) - N_gmm = np.concatenate((N_men, N_women)) - mu_gmm = np.mean(N_gmm) - std_gmm = np.std(N_gmm) + N_men_1 = np.random.normal(mu_men, std_men, 1000000) + N_men_2 = np.random.normal(mu_men, std_men, 1000000) + N_women_1 = np.random.normal(mu_women, std_women, 1000000) + N_women_2 = np.random.normal(mu_women, std_women, 1000000) + N_gmm_1 = np.concatenate((N_men_1, N_women_1)) + N_gmm_2 = np.concatenate((N_men_2, N_women_2)) + mu_gmm_1 = np.mean(N_gmm_1) + mu_gmm_2 = np.mean(N_gmm_2) + std_gmm = np.std(N_gmm_1) + mm_gender = std_gmm / mu_gmm_1 + var_gmm = np.var(N_gmm_1) + abs_diff_1 = np.abs(mu_gmm_1 - N_gmm_1) + abs_diff_2 = np.mean(np.abs(N_gmm_1 - N_gmm_2)) + mean_deviation_1 = np.mean(abs_diff_1) + mean_deviation_2 = np.mean(abs_diff_2) # sns.distplot(N_men, hist=False, rug=False, label="Men") # sns.distplot(N_women, hist=False, rug=False, label="Women") # sns.distplot(N_gmm, hist=False, rug=False, label="GMM") @@ -133,7 +143,21 @@ def gmm(): # plt.ylabel("Height distributions of men and women") # plt.legend() # plt.show() - print("Variace of GMM distribution: {:.2f}".format(std_gmm)) - mm_gender = std_gmm / mu_gmm + print("Mean of GMM distribution: {:.2f}".format(mu_gmm_1)) + print("Standard deviation: {:.2f}".format(std_gmm)) + print("Relative error (standard deviation) {:.3f} %".format(mm_gender * 100)) + print("Variance: {:.2f}".format(var_gmm)) + print("Mean deviation: {:.2f}".format(mean_deviation_1)) + print("Mean deviation 2: {:.2f}".format(mean_deviation_2)) + print("Relative error (mean absolute deviation): {:.3f} %".format((mean_deviation_1 / mu_gmm_1) * 100)) - return mm_gender \ No newline at end of file + return mm_gender + + +def get_confidence(xx, zz, std): + + theta = math.atan2(zz, xx) + + delta_x = std * math.cos(theta) + delta_z = std * math.sin(theta) + return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z) \ No newline at end of file diff --git a/monoloco/visuals/printer.py b/monoloco/visuals/printer.py new file mode 100644 index 0000000..02f4363 --- /dev/null +++ b/monoloco/visuals/printer.py @@ -0,0 +1,278 @@ + +import math +from collections import OrderedDict + +import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import matplotlib.cm as cm +from matplotlib.patches import Ellipse, Circle, Rectangle +from mpl_toolkits.axes_grid1 import make_axes_locatable + +from ..utils.camera import pixel_to_camera +from ..utils.misc import get_task_error + + +class Printer: + """ + Print results on images: birds eye view and computed distance + """ + FONTSIZE_BV = 16 + FONTSIZE = 18 + TEXTCOLOR = 'darkorange' + COLOR_KPS = 'yellow' + + def __init__(self, image, output_path, kk, output_types, epistemic=False, z_max=30, fig_width=10): + + self.im = image + self.kk = kk + self.output_types = output_types + self.epistemic = epistemic + self.z_max = z_max # To include ellipses in the image + self.y_scale = 1 + self.width = self.im.size[0] + self.height = self.im.size[1] + self.fig_width = fig_width + + # Define the output dir + self.path_out = output_path + self.cmap = cm.get_cmap('jet') + self.extensions = [] + + # Define variables of the class to change for every image + self.mpl_im0 = self.stds_ale = self.stds_epi = self.xx_gt = self.zz_gt = self.xx_pred = self.zz_pred =\ + self.dds_real = self.uv_centers = self.uv_shoulders = self.uv_kps = self.boxes = self.boxes_gt = \ + self.uv_camera = self.radius = None + + def _process_results(self, dic_ann): + # Include the vectors inside the interval given by z_max + self.stds_ale = dic_ann['stds_ale'] + self.stds_epi = dic_ann['stds_epi'] + self.xx_gt = [xx[0] for xx in dic_ann['xyz_real']] + self.zz_gt = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0 + for idx, xx in enumerate(dic_ann['xyz_real'])] + self.xx_pred = [xx[0] for xx in dic_ann['xyz_pred']] + self.zz_pred = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0 + for idx, xx in enumerate(dic_ann['xyz_pred'])] + self.dds_real = dic_ann['dds_real'] + self.uv_centers = dic_ann['uv_centers'] + self.uv_shoulders = dic_ann['uv_shoulders'] + self.uv_kps = dic_ann['uv_kps'] + self.boxes = dic_ann['boxes'] + self.boxes_gt = dic_ann['boxes_gt'] + + self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1]) + self.radius = 11 / 1600 * self.width + + def factory_axes(self): + """Create axes for figures: front bird combined""" + axes = [] + figures = [] + + # Initialize combined figure, resizing it for aesthetic proportions + if 'combined' in self.output_types: + assert 'bird' and 'front' not in self.output_types, \ + "combined figure cannot be print together with front or bird ones" + + self.y_scale = self.width / (self.height * 1.8) # Defined proportion + if self.y_scale < 0.95 or self.y_scale > 1.05: # allows more variation without resizing + self.im = self.im.resize((self.width, round(self.height * self.y_scale))) + self.width = self.im.size[0] + self.height = self.im.size[1] + fig_width = self.fig_width + 0.6 * self.fig_width + fig_height = self.fig_width * self.height / self.width + + # Distinguish between KITTI images and general images + fig_ar_1 = 1.7 if self.y_scale > 1.7 else 1.3 + width_ratio = 1.9 + self.extensions.append('.combined.png') + + fig, (ax1, ax0) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [1, width_ratio]}, + figsize=(fig_width, fig_height)) + ax1.set_aspect(fig_ar_1) + fig.set_tight_layout(True) + fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02) + + figures.append(fig) + assert 'front' not in self.output_types and 'bird' not in self.output_types, \ + "--combined arguments is not supported with other visualizations" + + # Initialize front figure + elif 'front' in self.output_types: + width = self.fig_width + height = self.fig_width * self.height / self.width + self.extensions.append(".front.png") + plt.figure(0) + fig0, ax0 = plt.subplots(1, 1, figsize=(width, height)) + fig0.set_tight_layout(True) + figures.append(fig0) + + # Create front figure axis + if any(xx in self.output_types for xx in ['front', 'combined']): + ax0 = self.set_axes(ax0, axis=0) + + divider = make_axes_locatable(ax0) + cax = divider.append_axes('right', size='3%', pad=0.05) + bar_ticks = self.z_max // 5 + 1 + norm = matplotlib.colors.Normalize(vmin=0, vmax=self.z_max) + scalar_mappable = plt.cm.ScalarMappable(cmap=self.cmap, norm=norm) + scalar_mappable.set_array([]) + plt.colorbar(scalar_mappable, ticks=np.linspace(0, self.z_max, bar_ticks), + boundaries=np.arange(- 0.05, self.z_max + 0.1, .1), cax=cax, label='Z [m]') + + axes.append(ax0) + if not axes: + axes.append(None) + + # Initialize bird-eye-view figure + if 'bird' in self.output_types: + self.extensions.append(".bird.png") + fig1, ax1 = plt.subplots(1, 1) + fig1.set_tight_layout(True) + figures.append(fig1) + if any(xx in self.output_types for xx in ['bird', 'combined']): + ax1 = self.set_axes(ax1, axis=1) # Adding field of view + axes.append(ax1) + return figures, axes + + def draw(self, figures, axes, dic_out, image, draw_text=True, legend=True, draw_box=False, + save=False, show=False): + + # Process the annotation dictionary of monoloco + self._process_results(dic_out) + + # Draw the front figure + num = 0 + self.mpl_im0.set_data(image) + for idx, uv in enumerate(self.uv_shoulders): + if any(xx in self.output_types for xx in ['front', 'combined']) and \ + min(self.zz_pred[idx], self.zz_gt[idx]) > 0: + + color = self.cmap((self.zz_pred[idx] % self.z_max) / self.z_max) + self.draw_circle(axes, uv, color) + if draw_box: + self.draw_boxes(axes, idx, color) + + if draw_text: + self.draw_text_front(axes, uv, num) + num += 1 + + # Draw the bird figure + num = 0 + for idx, _ in enumerate(self.xx_pred): + if any(xx in self.output_types for xx in ['bird', 'combined']) and self.zz_gt[idx] > 0: + + # Draw ground truth and predicted ellipses + self.draw_ellipses(axes, idx) + + # Draw bird eye view text + if draw_text: + self.draw_text_bird(axes, idx, num) + num += 1 + # Add the legend + if legend: + draw_legend(axes) + + # Draw, save or/and show the figures + for idx, fig in enumerate(figures): + fig.canvas.draw() + if save: + fig.savefig(self.path_out + self.extensions[idx], bbox_inches='tight') + if show: + fig.show() + + def draw_ellipses(self, axes, idx): + """draw uncertainty ellipses""" + target = get_task_error(self.dds_real[idx]) + angle_gt = get_angle(self.xx_gt[idx], self.zz_gt[idx]) + ellipse_real = Ellipse((self.xx_gt[idx], self.zz_gt[idx]), width=target * 2, height=1, + angle=angle_gt, color='lightgreen', fill=True, label="Task error") + axes[1].add_patch(ellipse_real) + if abs(self.zz_gt[idx] - self.zz_pred[idx]) > 0.001: + axes[1].plot(self.xx_gt[idx], self.zz_gt[idx], 'kx', label="Ground truth", markersize=3) + + angle = get_angle(self.xx_pred[idx], self.zz_pred[idx]) + ellipse_ale = Ellipse((self.xx_pred[idx], self.zz_pred[idx]), width=self.stds_ale[idx] * 2, + height=1, angle=angle, color='b', fill=False, label="Aleatoric Uncertainty", + linewidth=1.3) + ellipse_var = Ellipse((self.xx_pred[idx], self.zz_pred[idx]), width=self.stds_epi[idx] * 2, + height=1, angle=angle, color='r', fill=False, label="Uncertainty", + linewidth=1, linestyle='--') + + axes[1].add_patch(ellipse_ale) + if self.epistemic: + axes[1].add_patch(ellipse_var) + + axes[1].plot(self.xx_pred[idx], self.zz_pred[idx], 'ro', label="Predicted", markersize=3) + + def draw_boxes(self, axes, idx, color): + ww_box = self.boxes[idx][2] - self.boxes[idx][0] + hh_box = (self.boxes[idx][3] - self.boxes[idx][1]) * self.y_scale + ww_box_gt = self.boxes_gt[idx][2] - self.boxes_gt[idx][0] + hh_box_gt = (self.boxes_gt[idx][3] - self.boxes_gt[idx][1]) * self.y_scale + + rectangle = Rectangle((self.boxes[idx][0], self.boxes[idx][1] * self.y_scale), + width=ww_box, height=hh_box, fill=False, color=color, linewidth=3) + rectangle_gt = Rectangle((self.boxes_gt[idx][0], self.boxes_gt[idx][1] * self.y_scale), + width=ww_box_gt, height=hh_box_gt, fill=False, color='g', linewidth=2) + axes[0].add_patch(rectangle_gt) + axes[0].add_patch(rectangle) + + def draw_text_front(self, axes, uv, num): + axes[0].text(uv[0] + self.radius, uv[1] * self.y_scale - self.radius, str(num), + fontsize=self.FONTSIZE, color=self.TEXTCOLOR, weight='bold') + + def draw_text_bird(self, axes, idx, num): + """Plot the number in the bird eye view map""" + + std = self.stds_epi[idx] if self.stds_epi[idx] > 0 else self.stds_ale[idx] + theta = math.atan2(self.zz_pred[idx], self.xx_pred[idx]) + + delta_x = std * math.cos(theta) + delta_z = std * math.sin(theta) + + axes[1].text(self.xx_pred[idx] + delta_x, self.zz_pred[idx] + delta_z, + str(num), fontsize=self.FONTSIZE_BV, color='darkorange') + + def draw_circle(self, axes, uv, color): + + circle = Circle((uv[0], uv[1] * self.y_scale), radius=self.radius, color=color, fill=True) + axes[0].add_patch(circle) + + def set_axes(self, ax, axis): + assert axis in (0, 1) + + if axis == 0: + ax.set_axis_off() + ax.set_xlim(0, self.width) + ax.set_ylim(self.height, 0) + self.mpl_im0 = ax.imshow(self.im) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + + else: + uv_max = [0., float(self.height)] + xyz_max = pixel_to_camera(uv_max, self.kk, self.z_max) + x_max = abs(xyz_max[0]) # shortcut to avoid oval circles in case of different kk + ax.plot([0, x_max], [0, self.z_max], 'k--') + ax.plot([0, -x_max], [0, self.z_max], 'k--') + ax.set_ylim(0, self.z_max+1) + ax.set_xlabel("X [m]") + ax.set_ylabel("Z [m]") + + return ax + + +def draw_legend(axes): + handles, labels = axes[1].get_legend_handles_labels() + by_label = OrderedDict(zip(labels, handles)) + axes[1].legend(by_label.values(), by_label.keys()) + + +def get_angle(xx, zz): + """Obtain the points to plot the confidence of each annotation""" + + theta = math.atan2(zz, xx) + angle = theta * (180 / math.pi) + + return angle diff --git a/src/visuals/results.py b/monoloco/visuals/results.py similarity index 95% rename from src/visuals/results.py rename to monoloco/visuals/results.py index 0e618ef..9d1d481 100644 --- a/src/visuals/results.py +++ b/monoloco/visuals/results.py @@ -1,3 +1,4 @@ +# pylint: disable=R0915 import os import numpy as np @@ -5,7 +6,7 @@ import matplotlib.pyplot as plt from matplotlib.patches import Ellipse -def print_results(dic_stats, show=False, save=False): +def print_results(dic_stats, show=False): """ Visualize error as function of the distance on the test set and compare it with target errors based on human @@ -67,7 +68,7 @@ def print_results(dic_stats, show=False, save=False): xxs = get_distances(clusters) yys = target_error(np.array(xxs), mm_gender) ax[1].plot(xxs, bbs, marker='s', color='b', label="Spread b") - ax[1].plot(xxs, yys, '--', color='lightgreen', label="Task error", linewidth=2.5) + ax[1].plot(xxs, yys, '--', color='lightgreen', label="Task error", linewidth=2.5) yys_up = [rec_c + ar/2 * scale * yy for yy in yys] bbs_up = [rec_c + ar/2 * scale * bb for bb in bbs] yys_down = [rec_c - ar/2 * scale * yy for yy in yys] @@ -81,7 +82,7 @@ def print_results(dic_stats, show=False, save=False): for idx, xx in enumerate(xxs): te = Ellipse((xx, rec_c), width=yys[idx]*ar*scale, height=scale, angle=90, color='lightgreen', fill=True) - bi = Ellipse((xx, rec_c), width=bbs[idx]*ar*scale, height=scale, angle=90, color='b',linewidth=1.8, + bi = Ellipse((xx, rec_c), width=bbs[idx]*ar*scale, height=scale, angle=90, color='b', linewidth=1.8, fill=False) ax[0].add_patch(te) diff --git a/src/visuals/webcam.py b/monoloco/visuals/webcam.py similarity index 93% rename from src/visuals/webcam.py rename to monoloco/visuals/webcam.py index 1f65dd3..af42991 100644 --- a/src/visuals/webcam.py +++ b/monoloco/visuals/webcam.py @@ -1,3 +1,4 @@ +# pylint: disable=W0212 """ Webcam demo application @@ -14,11 +15,11 @@ from openpifpaf import transforms import cv2 -from visuals.printer import Printer -from utils.pifpaf import preprocess_pif -from predict.pifpaf import PifPaf -from predict.monoloco import MonoLoco -from predict.factory import factory_for_gt +from ..visuals.printer import Printer +from ..utils.pifpaf import preprocess_pif +from ..predict.pifpaf import PifPaf +from ..predict.network import MonoLoco +from ..predict.factory import factory_for_gt def webcam(args): @@ -107,7 +108,7 @@ class VisualizerMonoloco: del axes[1].patches[0] # the one became the 0 if len(axes[1].lines) > 2: del axes[1].lines[2] - if len(axes[1].texts) > 0: # in case of no text + if axes[1].texts: # in case of no text del axes[1].texts[0] printer.draw(figures, axes, dict_ann, image) mypause(0.01) diff --git a/src/eval/generate_kitti.py b/src/eval/generate_kitti.py deleted file mode 100644 index f77702b..0000000 --- a/src/eval/generate_kitti.py +++ /dev/null @@ -1,153 +0,0 @@ -"""Run monoloco over all the pifpaf joints of KITTI images -and extract and save the annotations in txt files""" - - -import math -import os -import glob -import json -import shutil -import itertools - -import numpy as np -import torch - -from predict.monoloco import MonoLoco -from eval.geom_baseline import compute_distance -from utils.kitti import get_calibration -from utils.pifpaf import preprocess_pif -from utils.camera import xyz_from_distance, get_keypoints, pixel_to_camera - - -def generate_kitti(model, dir_ann, p_dropout=0.2, n_dropout=0): - - cnt_ann = 0 - cnt_file = 0 - cnt_no_file = 0 - - dir_kk = os.path.join('data', 'kitti', 'calib') - dir_out = os.path.join('data', 'kitti', 'monoloco') - - # Remove the output directory if alreaady exists (avoid residual txt files) - if os.path.exists(dir_out): - shutil.rmtree(dir_out) - os.makedirs(dir_out) - print("Created empty output directory for txt files") - - # Load monoloco - use_cuda = torch.cuda.is_available() - device = torch.device("cuda" if use_cuda else "cpu") - monoloco = MonoLoco(model_path=model, device=device, n_dropout=n_dropout, p_dropout=p_dropout) - - # Run monoloco over the list of images - list_basename = factory_basename(dir_ann) - for basename in list_basename: - path_calib = os.path.join(dir_kk, basename + '.txt') - annotations, kk, tt = factory_file(path_calib, dir_ann, basename) - boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374)) - - if not keypoints: - cnt_no_file += 1 - continue - else: - # Run the network and the geometric baseline - outputs, varss = monoloco.forward(keypoints, kk) - dds_geom = eval_geometric(keypoints, kk, average_y=0.48) - - # Save the file - all_outputs = [outputs.detach().cpu(), varss.detach().cpu(), dds_geom] - all_inputs = [boxes, keypoints] - all_params = [kk, tt] - path_txt = os.path.join(dir_out, basename + '.txt') - save_txts(path_txt, all_inputs, all_outputs, all_params) - - # Update counting - cnt_ann += len(boxes) - cnt_file += 1 - - # Print statistics - print("Saved in {} txt {} annotations. Not found {} images" - .format(cnt_file, cnt_ann, cnt_no_file)) - - -def save_txts(path_txt, all_inputs, all_outputs, all_params): - - outputs, varss, dds_geom = all_outputs[:] - uv_boxes, keypoints = all_inputs[:] - kk, tt = all_params[:] - - uv_centers = get_keypoints(keypoints, mode='bottom') # Kitti uses the bottom center to calculate depth - xy_centers = pixel_to_camera(uv_centers, kk, 1) - zzs = xyz_from_distance(outputs[:, 0:1], xy_centers)[:, 2].tolist() - - with open(path_txt, "w+") as ff: - for idx in range(outputs.shape[0]): - - xx = float(xy_centers[idx][0]) * zzs[idx] + tt[0] - yy = float(xy_centers[idx][1]) * zzs[idx] + tt[1] - zz = zzs[idx] + tt[2] - dd = math.sqrt(xx ** 2 + yy ** 2 + zz ** 2) - cam_0 = [xx, yy, zz, dd] - - for el in uv_boxes[idx][:]: - ff.write("%s " % el) - for el in cam_0: - ff.write("%s " % el) - ff.write("%s " % float(outputs[idx][1])) - ff.write("%s " % float(varss[idx])) - ff.write("%s " % dds_geom[idx]) - ff.write("\n") - - # Save intrinsic matrix in the last row - for kk_el in itertools.chain(*kk): # Flatten a list of lists - ff.write("%f " % kk_el) - ff.write("\n") - - -def factory_basename(dir_ann): - """ Return all the basenames in the annotations folder""" - - list_ann = glob.glob(os.path.join(dir_ann, '*.json')) - list_basename = [os.path.basename(x).split('.')[0] for x in list_ann] - assert list_basename, " Missing json annotations file to create txt files for KITTI datasets" - return list_basename - - -def factory_file(path_calib, dir_ann, basename): - """Choose the annotation and the calibration files. Stereo option with ite = 1""" - - p_left, p_right = get_calibration(path_calib) - - kk, tt = p_left[:] - path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json') - - try: - with open(path_ann, 'r') as f: - annotations = json.load(f) - except FileNotFoundError: - annotations = None - - return annotations, kk, tt - - -def eval_geometric(keypoints, kk, average_y=0.48): - """ Evaluate geometric distance""" - - dds_geom = [] - - uv_centers = get_keypoints(keypoints, mode='center') - uv_shoulders = get_keypoints(keypoints, mode='shoulder') - uv_hips = get_keypoints(keypoints, mode='hip') - - xy_centers = pixel_to_camera(uv_centers, kk, 1) - xy_shoulders = pixel_to_camera(uv_shoulders, kk, 1) - xy_hips = pixel_to_camera(uv_hips, kk, 1) - - for idx, xy_center in enumerate(xy_centers): - zz = compute_distance(xy_shoulders[idx], xy_hips[idx], average_y) - xyz_center = np.array([xy_center[0], xy_center[1], zz]) - dd_geom = float(np.linalg.norm(xyz_center)) - dds_geom.append(dd_geom) - - return dds_geom - diff --git a/src/features/resize_images.py b/src/features/resize_images.py deleted file mode 100644 index 353b4c6..0000000 --- a/src/features/resize_images.py +++ /dev/null @@ -1,37 +0,0 @@ - -import glob -import logging -import os -import cv2 -import sys - - -def resize(input_glob, output_dir, factor=2): - """ - Resize images using multiplicative factor - """ - list_im = glob.glob(input_glob) - - - for idx, path_in in enumerate(list_im): - - basename, _ = os.path.splitext(os.path.basename(path_in)) - im = cv2.imread(path_in) - assert im is not None, "Image not found" - - # Paddle the image if requested and resized the dataset to a fixed dataset - h_im = im.shape[0] - w_im = im.shape[1] - w_new = round(factor * w_im) - h_new = round(factor * h_im) - - print("resizing image {} to: {} x {}".format(basename, w_new, h_new)) - im_new = cv2.resize(im, (w_new, h_new)) - - # Save the image - name_im = basename + '.png' - path_out = os.path.join(output_dir, name_im) - cv2.imwrite(path_out, im_new) - sys.stdout.write('\r' + 'Saving image number: {}'.format(idx) + '\t') - - diff --git a/src/utils/pifpaf.py b/src/utils/pifpaf.py deleted file mode 100644 index 5e0d166..0000000 --- a/src/utils/pifpaf.py +++ /dev/null @@ -1,57 +0,0 @@ - -import numpy as np - - -def preprocess_pif(annotations, im_size=None): - """ - Preprocess pif annotations: - 1. enlarge the box of 10% - 2. Constraint it inside the image (if image_size provided) - """ - - boxes = [] - keypoints = [] - - for dic in annotations: - box = dic['bbox'] - if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0) - return [], [] - - else: - kps = prepare_pif_kps(dic['keypoints']) - conf = float(np.mean(np.array(kps[2]))) - - # Add 10% for y and 20% for x - delta_h = (box[3] - box[1]) / 10 - delta_w = (box[2] - box[0]) / 5 - assert delta_h > -5 and delta_w > -5, "Bounding box <=0" - box[0] -= delta_w - box[1] -= delta_h - box[2] += delta_w - box[3] += delta_h - - # Put the box inside the image - if im_size is not None: - box[0] = max(0, box[0]) - box[1] = max(0, box[1]) - box[2] = min(box[2], im_size[0]) - box[3] = min(box[3], im_size[1]) - - box.append(conf) - boxes.append(box) - keypoints.append(kps) - - return boxes, keypoints - - -def prepare_pif_kps(kps_in): - """Convert from a list of 51 to a list of 3, 17""" - - assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3" - xxs = kps_in[0:][::3] - yys = kps_in[1:][::3] # from offset 1 every 3 - ccs = kps_in[2:][::3] - - return [xxs, yys, ccs] - - diff --git a/src/visuals/printer.py b/src/visuals/printer.py deleted file mode 100644 index 03fe3ae..0000000 --- a/src/visuals/printer.py +++ /dev/null @@ -1,243 +0,0 @@ - -import math -from collections import OrderedDict -import numpy as np - -import matplotlib -import matplotlib.pyplot as plt -import matplotlib.cm as cm -from matplotlib.patches import Ellipse, Circle -from mpl_toolkits.axes_grid1 import make_axes_locatable - -from utils.camera import pixel_to_camera -from utils.misc import get_task_error - - -class Printer: - """ - Print results on images: birds eye view and computed distance - """ - RADIUS_KPS = 6 - FONTSIZE_BV = 16 - FONTSIZE = 18 - TEXTCOLOR = 'darkorange' - COLOR_KPS = 'yellow' - - def __init__(self, image, output_path, kk, output_types, text=True, legend=True, epistemic=False, - z_max=30, fig_width=10): - - self.im = image - self.kk = kk - self.output_types = output_types - self.text = text - self.epistemic = epistemic - self.legend = legend - self.z_max = z_max # To include ellipses in the image - self.y_scale = 1 - self.width = self.im.size[0] - self.height = self.im.size[1] - self.fig_width = fig_width - - # Define the output dir - self.path_out = output_path - self.cmap = cm.get_cmap('jet') - self.extensions = [] - self.mpl_im0 = None - - def _process_results(self, dic_ann): - # Include the vectors inside the interval given by z_max - self.stds_ale = dic_ann['stds_ale'] - self.stds_ale_epi = dic_ann['stds_epi'] - self.xx_gt = [xx[0] for xx in dic_ann['xyz_real']] - self.zz_gt = [xx[2] if xx[2] < self.z_max - self.stds_ale_epi[idx] else 0 - for idx, xx in enumerate(dic_ann['xyz_real'])] - self.xx_pred = [xx[0] for xx in dic_ann['xyz_pred']] - self.zz_pred = [xx[2] if xx[2] < self.z_max - self.stds_ale_epi[idx] else 0 - for idx, xx in enumerate(dic_ann['xyz_pred'])] - self.dds_real = dic_ann['dds_real'] - self.uv_centers = dic_ann['uv_centers'] - self.uv_shoulders = dic_ann['uv_shoulders'] - self.uv_kps = dic_ann['uv_kps'] - - self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1]) - self.radius = 14 / 1600 * self.width - - def factory_axes(self): - """Create axes for figures: front bird combined""" - axes = [] - figures = [] - - # Initialize combined figure, resizing it for aesthetic proportions - if 'combined' in self.output_types: - assert 'bird' and 'front' not in self.output_types, \ - "combined figure cannot be print together with front or bird ones" - - self.y_scale = self.width / (self.height * 1.8) # Defined proportion - if self.y_scale < 0.95 or self.y_scale > 1.05: # allows more variation without resizing - self.im = self.im.resize((self.width, round(self.height * self.y_scale))) - self.width = self.im.size[0] - self.height = self.im.size[1] - fig_width = self.fig_width + 0.6 * self.fig_width - fig_height = self.fig_width * self.height / self.width - - # Distinguish between KITTI images and general images - if self.y_scale > 1.7: - fig_ar_1 = 1.7 - else: - fig_ar_1 = 1.3 - width_ratio = 1.9 - self.extensions.append('.combined.png') - - fig, (ax1, ax0) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [1, width_ratio]}, - figsize=(fig_width, fig_height)) - ax1.set_aspect(fig_ar_1) - fig.set_tight_layout(True) - fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02) - - figures.append(fig) - assert 'front' not in self.output_types and 'bird' not in self.output_types, \ - "--combined arguments is not supported with other visualizations" - - # Initialize front figure - elif 'front' in self.output_types: - width = self.fig_width - height = self.fig_width * self.height / self.width - self.extensions.append(".front.png") - plt.figure(0) - fig0, ax0 = plt.subplots(1, 1, figsize=(width, height)) - fig0.set_tight_layout(True) - - figures.append(fig0) - - # Create front figure axis - if any(xx in self.output_types for xx in ['front', 'combined']): - - ax0.set_axis_off() - ax0.set_xlim(0, self.width) - ax0.set_ylim(self.height, 0) - self.mpl_im0 = ax0.imshow(self.im) - z_min = 0 - bar_ticks = self.z_max // 5 + 1 - ax0.get_xaxis().set_visible(False) - ax0.get_yaxis().set_visible(False) - - divider = make_axes_locatable(ax0) - cax = divider.append_axes('right', size='3%', pad=0.05) - - norm = matplotlib.colors.Normalize(vmin=z_min, vmax=self.z_max) - scalar_mappable = plt.cm.ScalarMappable(cmap=self.cmap, norm=norm) - scalar_mappable.set_array([]) - plt.colorbar(scalar_mappable, ticks=np.linspace(z_min, self.z_max, bar_ticks), - boundaries=np.arange(z_min - 0.05, self.z_max + 0.1, .1), cax=cax, label='Z [m]') - - axes.append(ax0) - if not axes: - axes.append(None) - - if 'bird' in self.output_types: - self.extensions.append(".bird.png") - fig1, ax1 = plt.subplots(1, 1) - fig1.set_tight_layout(True) - figures.append(fig1) - if any(xx in self.output_types for xx in ['bird', 'combined']): - uv_max = [0., float(self.height)] - xyz_max = pixel_to_camera(uv_max, self.kk, self.z_max) - x_max = abs(xyz_max[0]) # shortcut to avoid oval circles in case of different kk - - # Adding field of view - ax1.plot([0, x_max], [0, self.z_max], 'k--') - ax1.plot([0, -x_max], [0, self.z_max], 'k--') - ax1.set_ylim(0, self.z_max+1) - ax1.set_xlabel("X [m]") - ax1.set_ylabel("Z [m]") - axes.append(ax1) - return figures, axes - - def draw(self, figures, axes, dic_out, image, save=False, show=False): - - self._process_results(dic_out) - num = 0 - if any(xx in self.output_types for xx in ['front', 'combined']): - self.mpl_im0.set_data(image) - for idx, uv in enumerate(self.uv_shoulders): - - if min(self.zz_pred[idx], self.zz_gt[idx]) > 0: - color = self.cmap((self.zz_pred[idx] % self.z_max) / self.z_max) - circle = Circle((uv[0], uv[1] * self.y_scale), radius=self.radius, color=color, fill=True) - axes[0].add_patch(circle) - - if self.text: - axes[0].text(uv[0]+self.radius, uv[1] * self.y_scale - self.radius, str(num), - fontsize=self.FONTSIZE, color=self.TEXTCOLOR, weight='bold') - num += 1 - if any(xx in self.output_types for xx in ['bird', 'combined']): - for idx, _ in enumerate(self.xx_gt): - if self.zz_gt[idx] > 0: - target = get_task_error(self.dds_real[idx]) - - angle = get_angle(self.xx_gt[idx], self.zz_gt[idx]) - ellipse_real = Ellipse((self.xx_gt[idx], self.zz_gt[idx]), width=target * 2, height=1, - angle=angle, color='lightgreen', fill=True, label="Task error") - axes[1].add_patch(ellipse_real) - if abs(self.zz_gt[idx] - self.zz_pred[idx]) > 0.001: - axes[1].plot(self.xx_gt[idx], self.zz_gt[idx], 'kx', label="Ground truth", markersize=3) - - # Print prediction and the real ground truth. - num = 0 - for idx, _ in enumerate(self.xx_pred): - if self.zz_gt[idx] > 0: # only the merging ones and inside the interval - - angle = get_angle(self.xx_pred[idx], self.zz_pred[idx]) - ellipse_ale = Ellipse((self.xx_pred[idx], self.zz_pred[idx]), width=self.stds_ale[idx] * 2, - height=1, angle=angle, color='b', fill=False, label="Aleatoric Uncertainty", - linewidth=1.3) - ellipse_var = Ellipse((self.xx_pred[idx], self.zz_pred[idx]), width=self.stds_ale_epi[idx] * 2, - height=1, angle=angle, color='r', fill=False, label="Uncertainty", - linewidth=1, linestyle='--') - - axes[1].add_patch(ellipse_ale) - if self.epistemic: - axes[1].add_patch(ellipse_var) - - axes[1].plot(self.xx_pred[idx], self.zz_pred[idx], 'ro', label="Predicted", markersize=3) - - # Setup the legend to avoid repetitions - if self.legend: - handles, labels = axes[1].get_legend_handles_labels() - by_label = OrderedDict(zip(labels, handles)) - axes[1].legend(by_label.values(), by_label.keys()) - - # Plot the number - (_, x_pos), (_, z_pos) = get_confidence(self.xx_pred[idx], self.zz_pred[idx], - self.stds_ale_epi[idx]) - - if self.text: - axes[1].text(x_pos, z_pos, str(num), fontsize=self.FONTSIZE_BV, color='darkorange') - num += 1 - - for idx, fig in enumerate(figures): - fig.canvas.draw() - if save: - fig.savefig(self.path_out + self.extensions[idx], bbox_inches='tight') - if show: - fig.show() - - -def get_confidence(xx, zz, std): - """Obtain the points to plot the confidence of each annotation""" - - theta = math.atan2(zz, xx) - - delta_x = std * math.cos(theta) - delta_z = std * math.sin(theta) - - return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z) - - -def get_angle(xx, zz): - """Obtain the points to plot the confidence of each annotation""" - - theta = math.atan2(zz, xx) - angle = theta * (180 / math.pi) - - return angle diff --git a/tests/test_utils.py b/tests/test_utils.py index 6a4f58d..052eb7e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,10 +1,12 @@ +import os +import sys - -from utils.iou import get_iou_matrix -from utils.camera import pixel_to_camera +# Python does not consider the current directory to be a package +sys.path.insert(0, os.path.join('..', 'monoloco')) def test_iou(): + from monoloco.utils.iou import get_iou_matrix boxes_pred = [[1, 100, 1, 200]] boxes_gt = [[100., 120., 150., 160.],[12, 110, 130., 160.]] iou_matrix = get_iou_matrix(boxes_pred, boxes_gt) @@ -12,6 +14,7 @@ def test_iou(): def test_pixel_to_camera(): + from monoloco.utils.camera import pixel_to_camera kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] zz = 10 uv_vector = [1000., 400.]