diff --git a/.travis.yml b/.travis.yml index 45a5143..00cd910 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,10 +4,8 @@ python: - "3.6" - "3.7" install: - - pip install openpifpaf - - pip install nuscenes-devkit - - pip install tabulate - - pip install pylint + - pip install --upgrade pip setuptools + - pip install ".[test]" script: - pylint monoloco --disable=unused-variable,fixme - pytest -vv diff --git a/README.md b/README.md index 359471f..7e275b7 100644 --- a/README.md +++ b/README.md @@ -29,14 +29,14 @@ A video with qualitative results is available on [YouTube](https://www.youtube.c Python 3 is required. Python 2 is not supported. Do not clone this repository and make sure there is no folder named monoloco in your current directory. -`pip install monoloco` +`pip3 install monoloco` Live demo is available, we recommend to try our **Webcam** functionality. More info in the webcam section. For development of the monoloco source code itself, you need to clone this repository and then: ``` -pip install openpifpaf nuscenes-devkit tabulate +pip3 install -e '.[test, prep]' ``` Python 3.6 or 3.7 is required for nuScenes development kit. Python 3 is required for openpifpaf. All details for Pifpaf pose detector at [openpifpaf](https://github.com/vita-epfl/openpifpaf). @@ -135,6 +135,7 @@ Multiple visualizations can be combined in different windows. The above gif has been obtained running on a Macbook the command: +`pip3 install opencv-python` `python3 -m monoloco.run predict --webcam --scale 0.2 --output_types combined --z_max 10 --checkpoint resnet50` # Preprocess @@ -152,6 +153,8 @@ data/kitti/images` Download nuScenes dataset from [nuScenes](https://www.nuscenes.org/download) (either Mini or TrainVal), save it anywhere and soft link it in `data/nuscenes` +nuScenes preprocessing requires `pip3 install nuscenes-devkit` + ### Annotations to preprocess MonoLoco is trained using 2D human pose joints. To create them run pifaf over KITTI or nuScenes training images. diff --git a/monoloco/__init__.py b/monoloco/__init__.py index 43649df..ccbedfd 100644 --- a/monoloco/__init__.py +++ b/monoloco/__init__.py @@ -1,4 +1,4 @@ """Open implementation of MonoLoco.""" -__version__ = '0.4.1' +__version__ = '0.4.2' diff --git a/monoloco/eval/__init__.py b/monoloco/eval/__init__.py index e69de29..43f3b84 100644 --- a/monoloco/eval/__init__.py +++ b/monoloco/eval/__init__.py @@ -0,0 +1,4 @@ + +from .eval_kitti import EvalKitti +from .generate_kitti import GenerateKitti +from .geom_baseline import geometric_baseline diff --git a/monoloco/eval/eval_kitti.py b/monoloco/eval/eval_kitti.py index 65a13ab..bc887cb 100644 --- a/monoloco/eval/eval_kitti.py +++ b/monoloco/eval/eval_kitti.py @@ -13,10 +13,9 @@ from itertools import chain from tabulate import tabulate -from ..utils.iou import get_iou_matches -from ..utils.misc import get_task_error, get_pixel_error -from ..utils.kitti import check_conditions, get_category, split_training, parse_ground_truth -from ..visuals.results import print_results +from ..utils import get_iou_matches, get_task_error, get_pixel_error, check_conditions, get_category, split_training, \ + parse_ground_truth +from ..visuals import print_results class EvalKitti: diff --git a/monoloco/eval/generate_kitti.py b/monoloco/eval/generate_kitti.py index dd3bcb2..a9e20d0 100644 --- a/monoloco/eval/generate_kitti.py +++ b/monoloco/eval/generate_kitti.py @@ -13,12 +13,10 @@ import copy import numpy as np import torch -from ..predict.network import MonoLoco +from ..network import MonoLoco +from ..network.process import preprocess_pifpaf from ..eval.geom_baseline import compute_distance -from ..utils.kitti import get_calibration -from ..utils.pifpaf import preprocess_pif -from ..utils.camera import xyz_from_distance, get_keypoints, pixel_to_camera -from ..utils.stereo import depth_from_disparity +from ..utils import get_keypoints, pixel_to_camera, xyz_from_distance, get_calibration, depth_from_disparity class GenerateKitti: @@ -51,7 +49,7 @@ class GenerateKitti: for basename in self.list_basename: path_calib = os.path.join(self.dir_kk, basename + '.txt') annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename) - boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374)) + boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374)) if not keypoints: cnt_no_file += 1 @@ -95,7 +93,7 @@ class GenerateKitti: for mode in ['left', 'right']: annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename, mode=mode) - boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374)) + boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374)) if not keypoints and mode == 'left': cnt_no_file += 1 diff --git a/monoloco/eval/geom_baseline.py b/monoloco/eval/geom_baseline.py index 8d45ce4..bd2d659 100644 --- a/monoloco/eval/geom_baseline.py +++ b/monoloco/eval/geom_baseline.py @@ -6,7 +6,7 @@ from collections import defaultdict import numpy as np -from ..utils.camera import pixel_to_camera, get_keypoints +from ..utils import pixel_to_camera, get_keypoints AVERAGE_Y = 0.48 CLUSTERS = ['10', '20', '30', 'all'] diff --git a/monoloco/network/__init__.py b/monoloco/network/__init__.py new file mode 100644 index 0000000..a3f1221 --- /dev/null +++ b/monoloco/network/__init__.py @@ -0,0 +1,4 @@ + +from .pifpaf import PifPaf, ImageList +from .losses import LaplacianLoss +from .net import MonoLoco diff --git a/monoloco/train/architectures.py b/monoloco/network/architectures.py similarity index 100% rename from monoloco/train/architectures.py rename to monoloco/network/architectures.py diff --git a/monoloco/train/losses.py b/monoloco/network/losses.py similarity index 99% rename from monoloco/train/losses.py rename to monoloco/network/losses.py index 83d896c..86382e5 100644 --- a/monoloco/train/losses.py +++ b/monoloco/network/losses.py @@ -1,3 +1,4 @@ + import math import torch import numpy as np diff --git a/monoloco/predict/network.py b/monoloco/network/net.py similarity index 91% rename from monoloco/predict/network.py rename to monoloco/network/net.py index b2efd97..ee9e971 100644 --- a/monoloco/predict/network.py +++ b/monoloco/network/net.py @@ -1,6 +1,6 @@ """ -Monoloco predictor. It receives pifpaf joints and outputs distances +Monoloco class. From 2D joints to real-world distances """ import logging @@ -8,10 +8,9 @@ from collections import defaultdict import torch -from ..utils.iou import get_iou_matches, reorder_matches -from ..utils.camera import get_keypoints, pixel_to_camera, xyz_from_distance -from ..utils.network import get_monoloco_inputs, unnormalize_bi, laplace_sampling -from ..train.architectures import LinearModel +from ..utils import get_iou_matches, reorder_matches, get_keypoints, pixel_to_camera, xyz_from_distance +from .process import preprocess_monoloco, unnormalize_bi, laplace_sampling +from .architectures import LinearModel class MonoLoco: @@ -43,7 +42,7 @@ class MonoLoco: return None, None with torch.no_grad(): - inputs = get_monoloco_inputs(torch.tensor(keypoints).to(self.device), torch.tensor(kk).to(self.device)) + inputs = preprocess_monoloco(torch.tensor(keypoints).to(self.device), torch.tensor(kk).to(self.device)) if self.n_dropout > 0: self.model.dropout.training = True # Manually reactivate dropout in eval total_outputs = torch.empty((0, inputs.size()[0])).to(self.device) diff --git a/monoloco/predict/pifpaf.py b/monoloco/network/pifpaf.py similarity index 100% rename from monoloco/predict/pifpaf.py rename to monoloco/network/pifpaf.py diff --git a/monoloco/network/process.py b/monoloco/network/process.py new file mode 100644 index 0000000..8eb387f --- /dev/null +++ b/monoloco/network/process.py @@ -0,0 +1,154 @@ + +import json + +import numpy as np +import torch + +from ..utils import get_keypoints, pixel_to_camera + + +def preprocess_monoloco(keypoints, kk): + + """ Preprocess batches of inputs + keypoints = torch tensors of (m, 3, 17) or list [3,17] + Outputs = torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box + """ + if isinstance(keypoints, list): + keypoints = torch.tensor(keypoints) + if isinstance(kk, list): + kk = torch.tensor(kk) + # Projection in normalized image coordinates and zero-center with the center of the bounding box + uv_center = get_keypoints(keypoints, mode='center') + xy1_center = pixel_to_camera(uv_center, kk, 10) + xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10) + # xy1_center[:, 1].fill_(0) #TODO + kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3) + kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view + return kps_out + + +def factory_for_gt(im_size, name=None, path_gt=None): + """Look for ground-truth annotations file and define calibration matrix based on image size """ + + try: + with open(path_gt, 'r') as f: + dic_names = json.load(f) + print('-' * 120 + "\nGround-truth file opened") + except (FileNotFoundError, TypeError): + print('-' * 120 + "\nGround-truth file not found") + dic_names = {} + + try: + kk = dic_names[name]['K'] + dic_gt = dic_names[name] + print("Matched ground-truth file!") + except KeyError: + dic_gt = None + x_factor = im_size[0] / 1600 + y_factor = im_size[1] / 900 + pixel_factor = (x_factor + y_factor) / 2 # TODO remove and check it + if im_size[0] / im_size[1] > 2.5: + kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration + else: + kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor], + [0, 1266.4 * pixel_factor, 491.5 * y_factor], + [0., 0., 1.]] # nuScenes calibration + + print("Using a standard calibration matrix...") + + return kk, dic_gt + + +def laplace_sampling(outputs, n_samples): + + # np.random.seed(1) + mu = outputs[:, 0] + bi = torch.abs(outputs[:, 1]) + + # Analytical + # uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0]) + # xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu)) + + # Sampling + cuda_check = outputs.is_cuda + if cuda_check: + get_device = outputs.get_device() + device = torch.device(type="cuda", index=get_device) + else: + device = torch.device("cpu") + + laplace = torch.distributions.Laplace(mu, bi) + xx = laplace.sample((n_samples,)).to(device) + + return xx + + +def epistemic_variance(total_outputs): + """Compute epistemic variance""" + + # var_y = np.sum(total_outputs**2, axis=0) / total_outputs.shape[0] - (np.mean(total_outputs, axis=0))**2 + var_y = np.var(total_outputs, axis=0) + lower_b = np.quantile(a=total_outputs, q=0.25, axis=0) + upper_b = np.quantile(a=total_outputs, q=0.75, axis=0) + var_new = (upper_b - lower_b) + + return var_y, var_new + + +def unnormalize_bi(outputs): + """Unnormalize relative bi of a nunmpy array""" + + outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0] + return outputs + + +def preprocess_pifpaf(annotations, im_size=None): + """ + Preprocess pif annotations: + 1. enlarge the box of 10% + 2. Constraint it inside the image (if image_size provided) + """ + + boxes = [] + keypoints = [] + + for dic in annotations: + box = dic['bbox'] + if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0) + return [], [] + + kps = prepare_pif_kps(dic['keypoints']) + conf = float(np.sort(np.array(kps[2]))[-3]) # The confidence is the 3rd highest value for the keypoints + + # Add 15% for y and 20% for x + delta_h = (box[3] - box[1]) / 7 + delta_w = (box[2] - box[0]) / 3.5 + assert delta_h > -5 and delta_w > -5, "Bounding box <=0" + box[0] -= delta_w + box[1] -= delta_h + box[2] += delta_w + box[3] += delta_h + + # Put the box inside the image + if im_size is not None: + box[0] = max(0, box[0]) + box[1] = max(0, box[1]) + box[2] = min(box[2], im_size[0]) + box[3] = min(box[3], im_size[1]) + + box.append(conf) + boxes.append(box) + keypoints.append(kps) + + return boxes, keypoints + + +def prepare_pif_kps(kps_in): + """Convert from a list of 51 to a list of 3, 17""" + + assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3" + xxs = kps_in[0:][::3] + yys = kps_in[1:][::3] # from offset 1 every 3 + ccs = kps_in[2:][::3] + + return [xxs, yys, ccs] diff --git a/monoloco/predict.py b/monoloco/predict.py new file mode 100644 index 0000000..528f963 --- /dev/null +++ b/monoloco/predict.py @@ -0,0 +1,123 @@ + +import os +import json + +import torch +from PIL import Image +from openpifpaf import show + +from .visuals.printer import Printer +from .network import PifPaf, ImageList, MonoLoco +from .network.process import factory_for_gt, preprocess_pifpaf + + +def predict(args): + + cnt = 0 + + # load pifpaf and monoloco models + pifpaf = PifPaf(args) + monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) + + # data + data = ImageList(args.images, scale=args.scale) + data_loader = torch.utils.data.DataLoader( + data, batch_size=1, shuffle=False, + pin_memory=args.pin_memory, num_workers=args.loader_workers) + + for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader): + images = image_tensors.permute(0, 2, 3, 1) + + processed_images = processed_images_cpu.to(args.device, non_blocking=True) + fields_batch = pifpaf.fields(processed_images) + + # unbatch + for image_path, image, processed_image_cpu, fields in zip( + image_paths, images, processed_images_cpu, fields_batch): + + if args.output_directory is None: + output_path = image_path + else: + file_name = os.path.basename(image_path) + output_path = os.path.join(args.output_directory, file_name) + print('image', idx, image_path, output_path) + + keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields) + pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing + images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image + + if 'monoloco' in args.networks: + im_size = (float(image.size()[1] / args.scale), + float(image.size()[0] / args.scale)) # Width, Height (original) + + # Extract calibration matrix and ground truth file if present + with open(image_path, 'rb') as f: + pil_image = Image.open(f).convert('RGB') + images_outputs.append(pil_image) + + im_name = os.path.basename(image_path) + + kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt) + + # Preprocess pifpaf outputs and run monoloco + boxes, keypoints = preprocess_pifpaf(pifpaf_out, im_size) + outputs, varss = monoloco.forward(keypoints, kk) + dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dic_gt) + + else: + dic_out = None + kk = None + + factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk) + print('Image {}\n'.format(cnt) + '-' * 120) + cnt += 1 + + +def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None): + """Output json files or images according to the choice""" + + # Save json file + if 'pifpaf' in args.networks: + keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:] + + # Visualizer + keypoint_painter = show.KeypointPainter(show_box=False) + skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True, + markersize=1, linewidth=4) + + if 'json' in args.output_types and keypoint_sets.size > 0: + with open(output_path + '.pifpaf.json', 'w') as f: + json.dump(pifpaf_out, f) + + if 'keypoints' in args.output_types: + with show.image_canvas(images_outputs[0], + output_path + '.keypoints.png', + show=args.show, + fig_width=args.figure_width, + dpi_factor=args.dpi_factor) as ax: + keypoint_painter.keypoints(ax, keypoint_sets) + + if 'skeleton' in args.output_types: + with show.image_canvas(images_outputs[0], + output_path + '.skeleton.png', + show=args.show, + fig_width=args.figure_width, + dpi_factor=args.dpi_factor) as ax: + skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) + + if 'monoloco' in args.networks: + if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])): + epistemic = False + if args.n_dropout > 0: + epistemic = True + + if dic_out['boxes']: # Only print in case of detections + printer = Printer(images_outputs[1], output_path, kk, output_types=args.output_types + , z_max=args.z_max, epistemic=epistemic) + figures, axes = printer.factory_axes() + printer.draw(figures, axes, dic_out, images_outputs[1], draw_box=args.draw_box, + save=True, show=args.show) + + if 'json' in args.output_types: + with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff: + json.dump(dic_out, ff) diff --git a/monoloco/predict/__init__.py b/monoloco/predict/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/monoloco/predict/factory.py b/monoloco/predict/factory.py deleted file mode 100644 index 2f259d7..0000000 --- a/monoloco/predict/factory.py +++ /dev/null @@ -1,87 +0,0 @@ - -import json -import os -from openpifpaf import show -from ..visuals.printer import Printer - - -def factory_for_gt(im_size, name=None, path_gt=None): - """Look for ground-truth annotations file and define calibration matrix based on image size """ - - try: - with open(path_gt, 'r') as f: - dic_names = json.load(f) - print('-' * 120 + "\nGround-truth file opened") - except (FileNotFoundError, TypeError): - print('-' * 120 + "\nGround-truth file not found") - dic_names = {} - - try: - kk = dic_names[name]['K'] - dic_gt = dic_names[name] - print("Matched ground-truth file!") - except KeyError: - dic_gt = None - x_factor = im_size[0] / 1600 - y_factor = im_size[1] / 900 - pixel_factor = (x_factor + y_factor) / 2 # TODO remove and check it - if im_size[0] / im_size[1] > 2.5: - kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration - else: - kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor], - [0, 1266.4 * pixel_factor, 491.5 * y_factor], - [0., 0., 1.]] # nuScenes calibration - - print("Using a standard calibration matrix...") - - return kk, dic_gt - - -def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None): - """Output json files or images according to the choice""" - - # Save json file - if 'pifpaf' in args.networks: - keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:] - - # Visualizer - keypoint_painter = show.KeypointPainter(show_box=False) - skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True, - markersize=1, linewidth=4) - - if 'json' in args.output_types and keypoint_sets.size > 0: - with open(output_path + '.pifpaf.json', 'w') as f: - json.dump(pifpaf_out, f) - - if 'keypoints' in args.output_types: - with show.image_canvas(images_outputs[0], - output_path + '.keypoints.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - keypoint_painter.keypoints(ax, keypoint_sets) - - if 'skeleton' in args.output_types: - with show.image_canvas(images_outputs[0], - output_path + '.skeleton.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) - - if 'monoloco' in args.networks: - if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])): - epistemic = False - if args.n_dropout > 0: - epistemic = True - - if dic_out['boxes']: # Only print in case of detections - printer = Printer(images_outputs[1], output_path, kk, output_types=args.output_types - , z_max=args.z_max, epistemic=epistemic) - figures, axes = printer.factory_axes() - printer.draw(figures, axes, dic_out, images_outputs[1], draw_box=args.draw_box, - save=True, show=args.show) - - if 'json' in args.output_types: - with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff: - json.dump(dic_out, ff) diff --git a/monoloco/predict/predict.py b/monoloco/predict/predict.py deleted file mode 100644 index c91b150..0000000 --- a/monoloco/predict/predict.py +++ /dev/null @@ -1,72 +0,0 @@ - -import os -from PIL import Image - -import torch - -from ..predict.pifpaf import PifPaf, ImageList -from ..predict.network import MonoLoco -from ..predict.factory import factory_for_gt, factory_outputs -from ..utils.pifpaf import preprocess_pif - - -def predict(args): - - cnt = 0 - - # load pifpaf and monoloco models - pifpaf = PifPaf(args) - monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - - # data - data = ImageList(args.images, scale=args.scale) - data_loader = torch.utils.data.DataLoader( - data, batch_size=1, shuffle=False, - pin_memory=args.pin_memory, num_workers=args.loader_workers) - - for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader): - images = image_tensors.permute(0, 2, 3, 1) - - processed_images = processed_images_cpu.to(args.device, non_blocking=True) - fields_batch = pifpaf.fields(processed_images) - - # unbatch - for image_path, image, processed_image_cpu, fields in zip( - image_paths, images, processed_images_cpu, fields_batch): - - if args.output_directory is None: - output_path = image_path - else: - file_name = os.path.basename(image_path) - output_path = os.path.join(args.output_directory, file_name) - print('image', idx, image_path, output_path) - - keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields) - pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing - images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image - - if 'monoloco' in args.networks: - im_size = (float(image.size()[1] / args.scale), - float(image.size()[0] / args.scale)) # Width, Height (original) - - # Extract calibration matrix and ground truth file if present - with open(image_path, 'rb') as f: - pil_image = Image.open(f).convert('RGB') - images_outputs.append(pil_image) - - im_name = os.path.basename(image_path) - - kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt) - - # Preprocess pifpaf outputs and run monoloco - boxes, keypoints = preprocess_pif(pifpaf_out, im_size) - outputs, varss = monoloco.forward(keypoints, kk) - dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dic_gt) - - else: - dic_out = None - kk = None - - factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk) - print('Image {}\n'.format(cnt) + '-' * 120) - cnt += 1 diff --git a/monoloco/prep/__init__.py b/monoloco/prep/__init__.py index e69de29..909b5f3 100644 --- a/monoloco/prep/__init__.py +++ b/monoloco/prep/__init__.py @@ -0,0 +1,3 @@ + +from .preprocess_nu import PreprocessNuscenes +from .preprocess_ki import PreprocessKitti diff --git a/monoloco/prep/preprocess_ki.py b/monoloco/prep/preprocess_ki.py index 10d2b69..eed1bf0 100644 --- a/monoloco/prep/preprocess_ki.py +++ b/monoloco/prep/preprocess_ki.py @@ -8,12 +8,9 @@ from collections import defaultdict import json import datetime -from ..prep.transforms import transform_keypoints -from ..utils.kitti import get_calibration, split_training, parse_ground_truth -from ..utils.network import get_monoloco_inputs -from ..utils.pifpaf import preprocess_pif -from ..utils.iou import get_iou_matches -from ..utils.misc import append_cluster +from .transforms import transform_keypoints +from ..utils import get_calibration, split_training, parse_ground_truth, get_iou_matches, append_cluster +from ..network.process import preprocess_pifpaf, preprocess_monoloco class PreprocessKitti: @@ -84,10 +81,10 @@ class PreprocessKitti: try: with open(os.path.join(self.dir_ann, basename + '.png.pifpaf.json'), 'r') as f: annotations = json.load(f) - boxes, keypoints = preprocess_pif(annotations, im_size=(1238, 374)) + boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1238, 374)) keypoints_hflip = transform_keypoints(keypoints, mode='flip') - inputs = get_monoloco_inputs(keypoints, kk).tolist() - inputs_hflip = get_monoloco_inputs(keypoints, kk).tolist() + inputs = preprocess_monoloco(keypoints, kk).tolist() + inputs_hflip = preprocess_monoloco(keypoints, kk).tolist() all_keypoints = [keypoints, keypoints_hflip] all_inputs = [inputs, inputs_hflip] diff --git a/monoloco/prep/preprocess_nu.py b/monoloco/prep/preprocess_nu.py index 9137d2a..e92e4e1 100644 --- a/monoloco/prep/preprocess_nu.py +++ b/monoloco/prep/preprocess_nu.py @@ -10,16 +10,11 @@ from collections import defaultdict import datetime import numpy as np - from nuscenes.nuscenes import NuScenes from nuscenes.utils import splits -from ..utils.iou import get_iou_matches -from ..utils.misc import append_cluster -from ..utils.nuscenes import select_categories -from ..utils.camera import project_3d -from ..utils.pifpaf import preprocess_pif -from ..utils.network import get_monoloco_inputs +from ..utils import get_iou_matches, append_cluster, select_categories, project_3d +from ..network.process import preprocess_pifpaf, preprocess_monoloco class PreprocessNuscenes: @@ -97,12 +92,12 @@ class PreprocessNuscenes: if exists: with open(path_pif, 'r') as file: annotations = json.load(file) - boxes, keypoints = preprocess_pif(annotations, im_size=(1600, 900)) + boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900)) else: continue if keypoints: - inputs = get_monoloco_inputs(keypoints, kk).tolist() + inputs = preprocess_monoloco(keypoints, kk).tolist() matches = get_iou_matches(boxes, boxes_gt, self.iou_min) for (idx, idx_gt) in matches: diff --git a/monoloco/run.py b/monoloco/run.py index 554d280..92f4534 100644 --- a/monoloco/run.py +++ b/monoloco/run.py @@ -1,20 +1,11 @@ # pylint: skip-file + import argparse from openpifpaf.network import nets from openpifpaf import decoder -from .prep.preprocess_nu import PreprocessNuscenes -from .prep.preprocess_ki import PreprocessKitti -from .predict.predict import predict -from .train.trainer import Trainer -from .eval.generate_kitti import GenerateKitti -from .eval.geom_baseline import geometric_baseline -from .train.hyp_tuning import HypTuning -from .eval.eval_kitti import EvalKitti -from .visuals.webcam import webcam - def cli(): @@ -105,28 +96,33 @@ def cli(): def main(): args = cli() - if args.command == 'predict': if args.webcam: + from .visuals.webcam import webcam webcam(args) else: + from .predict import predict predict(args) elif args.command == 'prep': if 'nuscenes' in args.dataset: + from .prep import PreprocessNuscenes prep = PreprocessNuscenes(args.dir_ann, args.dir_nuscenes, args.dataset, args.iou_min) prep.run() if 'kitti' in args.dataset: + from .prep import PreprocessKitti prep = PreprocessKitti(args.dir_ann, args.iou_min) prep.run() elif args.command == 'train': + from .train import HypTuning if args.hyp: hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs, baseline=args.baseline, dropout=args.dropout, multiplier=args.multiplier, r_seed=args.r_seed) hyp_tuning.train() else: + from .train import Trainer training = Trainer(joints=args.joints, epochs=args.epochs, bs=args.bs, baseline=args.baseline, dropout=args.dropout, lr=args.lr, sched_step=args.sched_step, n_stage=args.n_stage, sched_gamma=args.sched_gamma, hidden_size=args.hidden_size, @@ -137,20 +133,24 @@ def main(): elif args.command == 'eval': if args.geometric: + from .eval import geometric_baseline geometric_baseline(args.joints) if args.generate: + from .eval import GenerateKitti kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout) kitti_txt.run_mono() if args.stereo: kitti_txt.run_stereo() if args.dataset == 'kitti': + from .eval import EvalKitti kitti_eval = EvalKitti(verbose=args.verbose, stereo=args.stereo) kitti_eval.run() kitti_eval.printer(show=args.show) if 'nuscenes' in args.dataset: + from .train import Trainer training = Trainer(joints=args.joints) _ = training.evaluate(load=True, model=args.model, debug=False) diff --git a/monoloco/train/__init__.py b/monoloco/train/__init__.py index e69de29..11f7d70 100644 --- a/monoloco/train/__init__.py +++ b/monoloco/train/__init__.py @@ -0,0 +1,3 @@ + +from .hyp_tuning import HypTuning +from .trainer import Trainer diff --git a/monoloco/train/trainer.py b/monoloco/train/trainer.py index 9b2b37f..b5eae5b 100644 --- a/monoloco/train/trainer.py +++ b/monoloco/train/trainer.py @@ -1,4 +1,4 @@ -# pylint: skip-file # TODO +# pylint: skip-file # TODO make train file and class trainer and """ Training and evaluation of a neural network which predicts 3D localization and confidence intervals @@ -19,11 +19,11 @@ import torch.nn as nn from torch.utils.data import DataLoader from torch.optim import lr_scheduler -from .datasets import KeypointsDataset -from .architectures import LinearModel -from .losses import LaplacianLoss -from ..utils.logs import set_logger -from ..utils.network import laplace_sampling, unnormalize_bi +from train.datasets import KeypointsDataset +from ..network import LaplacianLoss +from ..network.process import laplace_sampling, unnormalize_bi +from ..network.architectures import LinearModel +from ..utils import set_logger class Trainer: diff --git a/monoloco/utils/__init__.py b/monoloco/utils/__init__.py index e69de29..e894156 100644 --- a/monoloco/utils/__init__.py +++ b/monoloco/utils/__init__.py @@ -0,0 +1,8 @@ + +from .iou import get_iou_matches, reorder_matches, get_iou_matrix +from .misc import get_task_error, get_pixel_error, append_cluster +from .kitti import check_conditions, get_category, split_training, parse_ground_truth, get_calibration +from .camera import xyz_from_distance, get_keypoints, pixel_to_camera, project_3d +from .logs import set_logger +from .stereo import depth_from_disparity +from ..utils.nuscenes import select_categories diff --git a/monoloco/utils/misc.py b/monoloco/utils/misc.py index b2ffd4d..ecd8883 100644 --- a/monoloco/utils/misc.py +++ b/monoloco/utils/misc.py @@ -1,6 +1,4 @@ -import random - def append_cluster(dic_jo, phase, xx, dd, kps): """Append the annotation based on its distance""" @@ -27,20 +25,19 @@ def append_cluster(dic_jo, phase, xx, dd, kps): def get_task_error(dd, mode='std'): - """Get target error not knowing the gender""" + """Get target error not knowing the gender, modeled through a Gaussian Mixure model""" assert mode in ('std', 'mad') + h_mean = 171.5 # average h of the human distribution if mode == 'std': - mm_gender = 0.0557 - elif mode == 'mad': # mean absolute deviation - mm_gender = 0.0457 - return mm_gender * dd + delta_h = 9.07 # delta h for 63% confidence interval + elif mode == 'mad': + delta_h = 7.83 # delta_h of mean absolute deviation + return dd * (1 - h_mean / (h_mean + delta_h)) def get_pixel_error(dd_gt, zz_gt): - """calculate error in stereo distance due to +-1 pixel mismatch (function of depth)""" + """calculate error in stereo distance due to 1 pixel mismatch (function of depth)""" disp = 0.54 * 721 / zz_gt - random.seed(1) - sign = random.choice((-1, 1)) - delta_z = zz_gt - 0.54 * 721 / (disp + sign) + delta_z = zz_gt - 0.54 * 721 / (disp - 1) return dd_gt + delta_z diff --git a/monoloco/utils/network.py b/monoloco/utils/network.py deleted file mode 100644 index 14b6507..0000000 --- a/monoloco/utils/network.py +++ /dev/null @@ -1,67 +0,0 @@ - -import numpy as np -import torch -from ..utils.camera import get_keypoints, pixel_to_camera - - -def get_monoloco_inputs(keypoints, kk): - - """ Preprocess batches of inputs - keypoints = torch tensors of (m, 3, 17) or list [3,17] - Outputs = torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box - """ - if isinstance(keypoints, list): - keypoints = torch.tensor(keypoints) - if isinstance(kk, list): - kk = torch.tensor(kk) - # Projection in normalized image coordinates and zero-center with the center of the bounding box - uv_center = get_keypoints(keypoints, mode='center') - xy1_center = pixel_to_camera(uv_center, kk, 10) - xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10) - # xy1_center[:, 1].fill_(0) #TODO - kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3) - kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view - return kps_out - - -def laplace_sampling(outputs, n_samples): - - # np.random.seed(1) - mu = outputs[:, 0] - bi = torch.abs(outputs[:, 1]) - - # Analytical - # uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0]) - # xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu)) - - # Sampling - cuda_check = outputs.is_cuda - if cuda_check: - get_device = outputs.get_device() - device = torch.device(type="cuda", index=get_device) - else: - device = torch.device("cpu") - - laplace = torch.distributions.Laplace(mu, bi) - xx = laplace.sample((n_samples,)).to(device) - - return xx - - -def epistemic_variance(total_outputs): - """Compute epistemic variance""" - - # var_y = np.sum(total_outputs**2, axis=0) / total_outputs.shape[0] - (np.mean(total_outputs, axis=0))**2 - var_y = np.var(total_outputs, axis=0) - lower_b = np.quantile(a=total_outputs, q=0.25, axis=0) - upper_b = np.quantile(a=total_outputs, q=0.75, axis=0) - var_new = (upper_b - lower_b) - - return var_y, var_new - - -def unnormalize_bi(outputs): - """Unnormalize relative bi of a nunmpy array""" - - outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0] - return outputs diff --git a/monoloco/utils/nuscenes.py b/monoloco/utils/nuscenes.py index 90fe7b3..481b99c 100644 --- a/monoloco/utils/nuscenes.py +++ b/monoloco/utils/nuscenes.py @@ -2,6 +2,7 @@ import random import json import os + import numpy as np diff --git a/monoloco/utils/pifpaf.py b/monoloco/utils/pifpaf.py deleted file mode 100644 index 01e01b0..0000000 --- a/monoloco/utils/pifpaf.py +++ /dev/null @@ -1,54 +0,0 @@ - -import numpy as np - - -def preprocess_pif(annotations, im_size=None): - """ - Preprocess pif annotations: - 1. enlarge the box of 10% - 2. Constraint it inside the image (if image_size provided) - """ - - boxes = [] - keypoints = [] - - for dic in annotations: - box = dic['bbox'] - if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0) - return [], [] - - kps = prepare_pif_kps(dic['keypoints']) - conf = float(np.sort(np.array(kps[2]))[-3]) # The confidence is the 3rd highest value for the keypoints - - # Add 15% for y and 20% for x - delta_h = (box[3] - box[1]) / 7 - delta_w = (box[2] - box[0]) / 3.5 - assert delta_h > -5 and delta_w > -5, "Bounding box <=0" - box[0] -= delta_w - box[1] -= delta_h - box[2] += delta_w - box[3] += delta_h - - # Put the box inside the image - if im_size is not None: - box[0] = max(0, box[0]) - box[1] = max(0, box[1]) - box[2] = min(box[2], im_size[0]) - box[3] = min(box[3], im_size[1]) - - box.append(conf) - boxes.append(box) - keypoints.append(kps) - - return boxes, keypoints - - -def prepare_pif_kps(kps_in): - """Convert from a list of 51 to a list of 3, 17""" - - assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3" - xxs = kps_in[0:][::3] - yys = kps_in[1:][::3] # from offset 1 every 3 - ccs = kps_in[2:][::3] - - return [xxs, yys, ccs] diff --git a/monoloco/utils/stereo.py b/monoloco/utils/stereo.py index 2045289..409f981 100644 --- a/monoloco/utils/stereo.py +++ b/monoloco/utils/stereo.py @@ -49,7 +49,7 @@ def filter_disparities(kps, kps_right_list, idx, expected_disps): disparity_y_conf = np.where(mask_conf, disparity_y, np.nan) # Mask outliers using iqr - mask_outlier = get_iqr_mask(disparity_x_conf) + mask_outlier = interquartile_mask(disparity_x_conf) disparity_x_mask = np.where(mask_outlier, disparity_x_conf, np.nan) disparity_y_mask = np.where(mask_outlier, disparity_y_conf, np.nan) avg_disparity_x = np.nanmedian(disparity_x_mask, axis=1) # ignore the nan @@ -79,7 +79,7 @@ def verify_stereo(zz_stereo, zz_mono, disparity_x, disparity_y): return False -def get_iqr_mask(distribution): +def interquartile_mask(distribution): quartile_1, quartile_3 = np.nanpercentile(distribution, [25, 75], axis=1) iqr = quartile_3 - quartile_1 lower_bound = quartile_1 - (iqr * 1.5) diff --git a/monoloco/visuals/__init__.py b/monoloco/visuals/__init__.py index e69de29..2e637d3 100644 --- a/monoloco/visuals/__init__.py +++ b/monoloco/visuals/__init__.py @@ -0,0 +1,3 @@ + +from .printer import Printer +from .results import print_results diff --git a/monoloco/visuals/paper.py b/monoloco/visuals/paper.py index 1e1c17b..5ca63d6 100644 --- a/monoloco/visuals/paper.py +++ b/monoloco/visuals/paper.py @@ -1,11 +1,12 @@ # pylint: skip-file -import numpy as np -import os import math + +import numpy as np import matplotlib.pyplot as plt from matplotlib.patches import Ellipse -from visuals.printer import get_angle + +from .printer import get_angle def paper(): @@ -112,30 +113,24 @@ def paper(): plt.close() - def target_error(xx, mm): return mm * xx + def gmm(): mu_men = 178 std_men = 7 mu_women = 165 std_women = 7 - N_men_1 = np.random.normal(mu_men, std_men, 1000000) - N_men_2 = np.random.normal(mu_men, std_men, 1000000) - N_women_1 = np.random.normal(mu_women, std_women, 1000000) - N_women_2 = np.random.normal(mu_women, std_women, 1000000) - N_gmm_1 = np.concatenate((N_men_1, N_women_1)) - N_gmm_2 = np.concatenate((N_men_2, N_women_2)) - mu_gmm_1 = np.mean(N_gmm_1) - mu_gmm_2 = np.mean(N_gmm_2) - std_gmm = np.std(N_gmm_1) - mm_gender = std_gmm / mu_gmm_1 - var_gmm = np.var(N_gmm_1) - abs_diff_1 = np.abs(mu_gmm_1 - N_gmm_1) - abs_diff_2 = np.mean(np.abs(N_gmm_1 - N_gmm_2)) - mean_deviation_1 = np.mean(abs_diff_1) - mean_deviation_2 = np.mean(abs_diff_2) + N_men = np.random.normal(mu_men, std_men, 10000000) + N_women = np.random.normal(mu_women, std_women, 10000000) + N_gmm = np.concatenate((N_men, N_women)) + perc, _ = np.nanpercentile(N_gmm, [18.5, 81.5]) # Laplace bi => 63% + mu_gmm = np.mean(N_gmm) + bi_gmm = mu_gmm - perc + abs_diff = np.abs(mu_gmm - N_gmm) + + mean_deviation = np.mean(abs_diff) # sns.distplot(N_men, hist=False, rug=False, label="Men") # sns.distplot(N_women, hist=False, rug=False, label="Women") # sns.distplot(N_gmm, hist=False, rug=False, label="GMM") @@ -143,15 +138,10 @@ def gmm(): # plt.ylabel("Height distributions of men and women") # plt.legend() # plt.show() - print("Mean of GMM distribution: {:.2f}".format(mu_gmm_1)) - print("Standard deviation: {:.2f}".format(std_gmm)) - print("Relative error (standard deviation) {:.3f} %".format(mm_gender * 100)) - print("Variance: {:.2f}".format(var_gmm)) - print("Mean deviation: {:.2f}".format(mean_deviation_1)) - print("Mean deviation 2: {:.2f}".format(mean_deviation_2)) - print("Relative error (mean absolute deviation): {:.3f} %".format((mean_deviation_1 / mu_gmm_1) * 100)) - - return mm_gender + print("Mean of GMM distribution: {:.2f}".format(mu_gmm)) + print("+- bi interval (63%) : {:.2f}".format(bi_gmm)) + print("Mean deviation: {:.2f}".format(mean_deviation)) + print("Relative error (mean absolute deviation): {:.3f} %".format((mean_deviation / mu_gmm) * 100)) def get_confidence(xx, zz, std): @@ -160,4 +150,4 @@ def get_confidence(xx, zz, std): delta_x = std * math.cos(theta) delta_z = std * math.sin(theta) - return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z) \ No newline at end of file + return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z) diff --git a/monoloco/visuals/printer.py b/monoloco/visuals/printer.py index 02f4363..d3fa627 100644 --- a/monoloco/visuals/printer.py +++ b/monoloco/visuals/printer.py @@ -9,8 +9,7 @@ import matplotlib.cm as cm from matplotlib.patches import Ellipse, Circle, Rectangle from mpl_toolkits.axes_grid1 import make_axes_locatable -from ..utils.camera import pixel_to_camera -from ..utils.misc import get_task_error +from ..utils import pixel_to_camera, get_task_error class Printer: diff --git a/monoloco/visuals/results.py b/monoloco/visuals/results.py index 9d1d481..e205f48 100644 --- a/monoloco/visuals/results.py +++ b/monoloco/visuals/results.py @@ -1,6 +1,7 @@ # pylint: disable=R0915 import os + import numpy as np import matplotlib.pyplot as plt from matplotlib.patches import Ellipse diff --git a/monoloco/visuals/webcam.py b/monoloco/visuals/webcam.py index af42991..b621e81 100644 --- a/monoloco/visuals/webcam.py +++ b/monoloco/visuals/webcam.py @@ -12,14 +12,11 @@ import torch import matplotlib.pyplot as plt from PIL import Image from openpifpaf import transforms - import cv2 -from ..visuals.printer import Printer -from ..utils.pifpaf import preprocess_pif -from ..predict.pifpaf import PifPaf -from ..predict.network import MonoLoco -from ..predict.factory import factory_for_gt +from ..visuals import Printer +from ..network import PifPaf, MonoLoco +from ..network.process import preprocess_pifpaf, factory_for_gt def webcam(args): @@ -66,7 +63,7 @@ def webcam(args): visualizer_monoloco.send(None) if pifpaf_out: - boxes, keypoints = preprocess_pif(pifpaf_out, (width, height)) + boxes, keypoints = preprocess_pifpaf(pifpaf_out, (width, height)) outputs, varss = monoloco.forward(keypoints, kk) dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dict_gt) visualizer_monoloco.send((pil_image, dic_out)) diff --git a/setup.py b/setup.py index 2bf0100..c2b72b0 100644 --- a/setup.py +++ b/setup.py @@ -10,9 +10,9 @@ setup( version=VERSION, packages=[ 'monoloco', - 'monoloco.train', - 'monoloco.predict', + 'monoloco.network', 'monoloco.eval', + 'monoloco.train', 'monoloco.prep', 'monoloco.visuals', 'monoloco.utils' @@ -28,9 +28,15 @@ setup( install_requires=[ 'openpifpaf', - 'nuscenes-devkit', # for nuScenes dataset preprocessing 'tabulate', # For evaluation - 'pylint', - 'pytest', ], + extras_require={ + 'test': [ + 'pylint', + 'pytest', + ], + 'prep': [ + 'nuscenes-devkit', + ], + }, ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 052eb7e..19865c0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,7 +6,7 @@ sys.path.insert(0, os.path.join('..', 'monoloco')) def test_iou(): - from monoloco.utils.iou import get_iou_matrix + from monoloco.utils import get_iou_matrix boxes_pred = [[1, 100, 1, 200]] boxes_gt = [[100., 120., 150., 160.],[12, 110, 130., 160.]] iou_matrix = get_iou_matrix(boxes_pred, boxes_gt) @@ -14,7 +14,7 @@ def test_iou(): def test_pixel_to_camera(): - from monoloco.utils.camera import pixel_to_camera + from monoloco.utils import pixel_to_camera kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] zz = 10 uv_vector = [1000., 400.]