refactor (#8)

* Make import from __init__ files * add in init only classes or utils functions * refactor packages * fix pylint cyclic import * add task error with 63% confidence intervals and mad * fix pixel_error * update setup * update installation istructions * update instructions * update instructions * update package installation
2019-07-23 15:55:46 +02:00 · 2019-07-23 15:55:46 +02:00 · 8366a436ee
commit 8366a436ee
parent 235a90ef47
36 changed files with 401 additions and 398 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -4,10 +4,8 @@ python:
 - "3.6"
 - "3.7"
 install:
- - pip install openpifpaf
- - pip install nuscenes-devkit
- - pip install tabulate
- - pip install pylint
+ - pip install --upgrade pip setuptools
+ - pip install ".[test]"
 script:
 - pylint monoloco --disable=unused-variable,fixme
 - pytest -vv
--- a/README.md
+++ b/README.md
@ -29,14 +29,14 @@ A video with qualitative results is available on [YouTube](https://www.youtube.c
 Python 3 is required. Python 2 is not supported. 
 Do not clone this repository and make sure there is no folder named monoloco in your current directory.

-`pip install monoloco`
+`pip3 install monoloco`

 Live demo is available, we recommend to try our **Webcam** functionality. More info in the webcam section.


 For development of the monoloco source code itself, you need to clone this repository and then:
 ```
-pip install openpifpaf nuscenes-devkit tabulate
+pip3 install -e '.[test, prep]'
 ```
 Python 3.6 or 3.7 is required for nuScenes development kit. Python 3 is required for openpifpaf. 
 All details for Pifpaf pose detector at [openpifpaf](https://github.com/vita-epfl/openpifpaf).
@ -135,6 +135,7 @@ Multiple visualizations can be combined in different windows.

 The above gif has been obtained running on a Macbook the command:

+`pip3 install opencv-python`
 `python3 -m monoloco.run predict --webcam --scale 0.2 --output_types combined --z_max 10 --checkpoint resnet50`

 # Preprocess
@ -152,6 +153,8 @@ data/kitti/images`
 Download nuScenes dataset from [nuScenes](https://www.nuscenes.org/download) (either Mini or TrainVal), 
 save it anywhere and soft link it in `data/nuscenes`

+nuScenes preprocessing requires `pip3 install nuscenes-devkit`
+

 ### Annotations to preprocess
 MonoLoco is trained using 2D human pose joints. To create them run pifaf over KITTI or nuScenes training images. 
--- a/monoloco/init.py
+++ b/monoloco/init.py
@ -1,4 +1,4 @@

 """Open implementation of MonoLoco."""

-__version__ = '0.4.1'
+__version__ = '0.4.2'
--- a/monoloco/eval/init.py
+++ b/monoloco/eval/init.py
@ -0,0 +1,4 @@
+
+from .eval_kitti import EvalKitti
+from .generate_kitti import GenerateKitti
+from .geom_baseline import geometric_baseline
--- a/monoloco/eval/eval_kitti.py
+++ b/monoloco/eval/eval_kitti.py
@ -13,10 +13,9 @@ from itertools import chain

 from tabulate import tabulate

-from ..utils.iou import get_iou_matches
-from ..utils.misc import get_task_error, get_pixel_error
-from ..utils.kitti import check_conditions, get_category, split_training, parse_ground_truth
-from ..visuals.results import print_results
+from ..utils import get_iou_matches, get_task_error, get_pixel_error, check_conditions, get_category, split_training, \
+    parse_ground_truth
+from ..visuals import print_results


 class EvalKitti:
--- a/monoloco/eval/generate_kitti.py
+++ b/monoloco/eval/generate_kitti.py
@ -13,12 +13,10 @@ import copy
 import numpy as np
 import torch

-from ..predict.network import MonoLoco
+from ..network import MonoLoco
+from ..network.process import preprocess_pifpaf
 from ..eval.geom_baseline import compute_distance
-from ..utils.kitti import get_calibration
-from ..utils.pifpaf import preprocess_pif
-from ..utils.camera import xyz_from_distance, get_keypoints, pixel_to_camera
-from ..utils.stereo import depth_from_disparity
+from ..utils import get_keypoints, pixel_to_camera, xyz_from_distance, get_calibration, depth_from_disparity


 class GenerateKitti:
@ -51,7 +49,7 @@ class GenerateKitti:
        for basename in self.list_basename:
            path_calib = os.path.join(self.dir_kk, basename + '.txt')
            annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename)
-            boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374))
+            boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374))

            if not keypoints:
                cnt_no_file += 1
@ -95,7 +93,7 @@ class GenerateKitti:

            for mode in ['left', 'right']:
                annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename, mode=mode)
-                boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374))
+                boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374))

                if not keypoints and mode == 'left':
                    cnt_no_file += 1
--- a/monoloco/eval/geom_baseline.py
+++ b/monoloco/eval/geom_baseline.py
@ -6,7 +6,7 @@ from collections import defaultdict

 import numpy as np

-from ..utils.camera import pixel_to_camera, get_keypoints
+from ..utils import pixel_to_camera, get_keypoints

 AVERAGE_Y = 0.48
 CLUSTERS = ['10', '20', '30', 'all']
--- a/monoloco/network/init.py
+++ b/monoloco/network/init.py
@ -0,0 +1,4 @@
+
+from .pifpaf import PifPaf, ImageList
+from .losses import LaplacianLoss
+from .net import MonoLoco
--- a/monoloco/network/architectures.py
+++ b/monoloco/network/architectures.py
--- a/monoloco/network/losses.py
+++ b/monoloco/network/losses.py
@ -1,3 +1,4 @@
+
 import math
 import torch
 import numpy as np
--- a/monoloco/predict/network.py
+++ b/monoloco/predict/network.py
@ -1,6 +1,6 @@

 """
-Monoloco predictor. It receives pifpaf joints and outputs distances
+Monoloco class. From 2D joints to real-world distances
 """

 import logging
@ -8,10 +8,9 @@ from collections import defaultdict

 import torch

-from ..utils.iou import get_iou_matches, reorder_matches
-from ..utils.camera import get_keypoints, pixel_to_camera, xyz_from_distance
-from ..utils.network import get_monoloco_inputs, unnormalize_bi, laplace_sampling
-from ..train.architectures import LinearModel
+from ..utils import get_iou_matches, reorder_matches, get_keypoints, pixel_to_camera, xyz_from_distance
+from .process import preprocess_monoloco, unnormalize_bi, laplace_sampling
+from .architectures import LinearModel


 class MonoLoco:
@ -43,7 +42,7 @@ class MonoLoco:
            return None, None

        with torch.no_grad():
-            inputs = get_monoloco_inputs(torch.tensor(keypoints).to(self.device), torch.tensor(kk).to(self.device))
+            inputs = preprocess_monoloco(torch.tensor(keypoints).to(self.device), torch.tensor(kk).to(self.device))
            if self.n_dropout > 0:
                self.model.dropout.training = True  # Manually reactivate dropout in eval
                total_outputs = torch.empty((0, inputs.size()[0])).to(self.device)
--- a/monoloco/network/pifpaf.py
+++ b/monoloco/network/pifpaf.py
--- a/monoloco/network/process.py
+++ b/monoloco/network/process.py
@ -0,0 +1,154 @@
+
+import json
+
+import numpy as np
+import torch
+
+from ..utils import get_keypoints, pixel_to_camera
+
+
+def preprocess_monoloco(keypoints, kk):
+
+    """ Preprocess batches of inputs
+    keypoints = torch tensors of (m, 3, 17)  or list [3,17]
+    Outputs =  torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box
+    """
+    if isinstance(keypoints, list):
+        keypoints = torch.tensor(keypoints)
+    if isinstance(kk, list):
+        kk = torch.tensor(kk)
+    # Projection in normalized image coordinates and zero-center with the center of the bounding box
+    uv_center = get_keypoints(keypoints, mode='center')
+    xy1_center = pixel_to_camera(uv_center, kk, 10)
+    xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10)
+    # xy1_center[:, 1].fill_(0)  #TODO
+    kps_norm = xy1_all - xy1_center.unsqueeze(1)  # (m, 17, 3) - (m, 1, 3)
+    kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1)  # no contiguous for view
+    return kps_out
+
+
+def factory_for_gt(im_size, name=None, path_gt=None):
+    """Look for ground-truth annotations file and define calibration matrix based on image size """
+
+    try:
+        with open(path_gt, 'r') as f:
+            dic_names = json.load(f)
+        print('-' * 120 + "\nGround-truth file opened")
+    except (FileNotFoundError, TypeError):
+        print('-' * 120 + "\nGround-truth file not found")
+        dic_names = {}
+
+    try:
+        kk = dic_names[name]['K']
+        dic_gt = dic_names[name]
+        print("Matched ground-truth file!")
+    except KeyError:
+        dic_gt = None
+        x_factor = im_size[0] / 1600
+        y_factor = im_size[1] / 900
+        pixel_factor = (x_factor + y_factor) / 2   # TODO remove and check it
+        if im_size[0] / im_size[1] > 2.5:
+            kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]]  # Kitti calibration
+        else:
+            kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor],
+                  [0, 1266.4 * pixel_factor, 491.5 * y_factor],
+                  [0., 0., 1.]]  # nuScenes calibration
+
+        print("Using a standard calibration matrix...")
+
+    return kk, dic_gt
+
+
+def laplace_sampling(outputs, n_samples):
+
+    # np.random.seed(1)
+    mu = outputs[:, 0]
+    bi = torch.abs(outputs[:, 1])
+
+    # Analytical
+    # uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0])
+    # xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu))
+
+    # Sampling
+    cuda_check = outputs.is_cuda
+    if cuda_check:
+        get_device = outputs.get_device()
+        device = torch.device(type="cuda", index=get_device)
+    else:
+        device = torch.device("cpu")
+
+    laplace = torch.distributions.Laplace(mu, bi)
+    xx = laplace.sample((n_samples,)).to(device)
+
+    return xx
+
+
+def epistemic_variance(total_outputs):
+    """Compute epistemic variance"""
+
+    # var_y = np.sum(total_outputs**2, axis=0) / total_outputs.shape[0] - (np.mean(total_outputs, axis=0))**2
+    var_y = np.var(total_outputs, axis=0)
+    lower_b = np.quantile(a=total_outputs, q=0.25, axis=0)
+    upper_b = np.quantile(a=total_outputs, q=0.75, axis=0)
+    var_new = (upper_b - lower_b)
+
+    return var_y, var_new
+
+
+def unnormalize_bi(outputs):
+    """Unnormalize relative bi of a nunmpy array"""
+
+    outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0]
+    return outputs
+
+
+def preprocess_pifpaf(annotations, im_size=None):
+    """
+    Preprocess pif annotations:
+    1. enlarge the box of 10%
+    2. Constraint it inside the image (if image_size provided)
+    """
+
+    boxes = []
+    keypoints = []
+
+    for dic in annotations:
+        box = dic['bbox']
+        if box[3] < 0.5:  # Check for no detections (boxes 0,0,0,0)
+            return [], []
+
+        kps = prepare_pif_kps(dic['keypoints'])
+        conf = float(np.sort(np.array(kps[2]))[-3])  # The confidence is the 3rd highest value for the keypoints
+
+        # Add 15% for y and 20% for x
+        delta_h = (box[3] - box[1]) / 7
+        delta_w = (box[2] - box[0]) / 3.5
+        assert delta_h > -5 and delta_w > -5, "Bounding box <=0"
+        box[0] -= delta_w
+        box[1] -= delta_h
+        box[2] += delta_w
+        box[3] += delta_h
+
+        # Put the box inside the image
+        if im_size is not None:
+            box[0] = max(0, box[0])
+            box[1] = max(0, box[1])
+            box[2] = min(box[2], im_size[0])
+            box[3] = min(box[3], im_size[1])
+
+        box.append(conf)
+        boxes.append(box)
+        keypoints.append(kps)
+
+    return boxes, keypoints
+
+
+def prepare_pif_kps(kps_in):
+    """Convert from a list of 51 to a list of 3, 17"""
+
+    assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3"
+    xxs = kps_in[0:][::3]
+    yys = kps_in[1:][::3]  # from offset 1 every 3
+    ccs = kps_in[2:][::3]
+
+    return [xxs, yys, ccs]
--- a/monoloco/predict.py
+++ b/monoloco/predict.py
@ -0,0 +1,123 @@
+
+import os
+import json
+
+import torch
+from PIL import Image
+from openpifpaf import show
+
+from .visuals.printer import Printer
+from .network import PifPaf, ImageList, MonoLoco
+from .network.process import factory_for_gt, preprocess_pifpaf
+
+
+def predict(args):
+
+    cnt = 0
+
+    # load pifpaf and monoloco models
+    pifpaf = PifPaf(args)
+    monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
+
+    # data
+    data = ImageList(args.images, scale=args.scale)
+    data_loader = torch.utils.data.DataLoader(
+        data, batch_size=1, shuffle=False,
+        pin_memory=args.pin_memory, num_workers=args.loader_workers)
+
+    for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
+        images = image_tensors.permute(0, 2, 3, 1)
+
+        processed_images = processed_images_cpu.to(args.device, non_blocking=True)
+        fields_batch = pifpaf.fields(processed_images)
+
+        # unbatch
+        for image_path, image, processed_image_cpu, fields in zip(
+                image_paths, images, processed_images_cpu, fields_batch):
+
+            if args.output_directory is None:
+                output_path = image_path
+            else:
+                file_name = os.path.basename(image_path)
+                output_path = os.path.join(args.output_directory, file_name)
+            print('image', idx, image_path, output_path)
+
+            keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
+            pifpaf_outputs = [keypoint_sets, scores, pifpaf_out]  # keypoints_sets and scores for pifpaf printing
+            images_outputs = [image]  # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
+
+            if 'monoloco' in args.networks:
+                im_size = (float(image.size()[1] / args.scale),
+                           float(image.size()[0] / args.scale))  # Width, Height (original)
+
+                # Extract calibration matrix and ground truth file if present
+                with open(image_path, 'rb') as f:
+                    pil_image = Image.open(f).convert('RGB')
+                    images_outputs.append(pil_image)
+
+                im_name = os.path.basename(image_path)
+
+                kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
+
+                # Preprocess pifpaf outputs and run monoloco
+                boxes, keypoints = preprocess_pifpaf(pifpaf_out, im_size)
+                outputs, varss = monoloco.forward(keypoints, kk)
+                dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dic_gt)
+
+            else:
+                dic_out = None
+                kk = None
+
+            factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
+            print('Image {}\n'.format(cnt) + '-' * 120)
+            cnt += 1
+
+
+def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None):
+    """Output json files or images according to the choice"""
+
+    # Save json file
+    if 'pifpaf' in args.networks:
+        keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:]
+
+        # Visualizer
+        keypoint_painter = show.KeypointPainter(show_box=False)
+        skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
+                                                markersize=1, linewidth=4)
+
+        if 'json' in args.output_types and keypoint_sets.size > 0:
+            with open(output_path + '.pifpaf.json', 'w') as f:
+                json.dump(pifpaf_out, f)
+
+        if 'keypoints' in args.output_types:
+            with show.image_canvas(images_outputs[0],
+                                   output_path + '.keypoints.png',
+                                   show=args.show,
+                                   fig_width=args.figure_width,
+                                   dpi_factor=args.dpi_factor) as ax:
+                keypoint_painter.keypoints(ax, keypoint_sets)
+
+        if 'skeleton' in args.output_types:
+            with show.image_canvas(images_outputs[0],
+                                   output_path + '.skeleton.png',
+                                   show=args.show,
+                                   fig_width=args.figure_width,
+                                   dpi_factor=args.dpi_factor) as ax:
+                skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
+
+    if 'monoloco' in args.networks:
+        if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])):
+            epistemic = False
+            if args.n_dropout > 0:
+                epistemic = True
+
+            if dic_out['boxes']:  # Only print in case of detections
+                printer = Printer(images_outputs[1], output_path, kk, output_types=args.output_types
+                                  , z_max=args.z_max, epistemic=epistemic)
+                figures, axes = printer.factory_axes()
+                printer.draw(figures, axes, dic_out, images_outputs[1], draw_box=args.draw_box,
+                             save=True, show=args.show)
+
+        if 'json' in args.output_types:
+            with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
+                json.dump(dic_out, ff)
--- a/monoloco/predict/init.py
+++ b/monoloco/predict/init.py
--- a/monoloco/predict/factory.py
+++ b/monoloco/predict/factory.py
@ -1,87 +0,0 @@
-
-import json
-import os
-from openpifpaf import show
-from ..visuals.printer import Printer
-
-
-def factory_for_gt(im_size, name=None, path_gt=None):
-    """Look for ground-truth annotations file and define calibration matrix based on image size """
-
-    try:
-        with open(path_gt, 'r') as f:
-            dic_names = json.load(f)
-        print('-' * 120 + "\nGround-truth file opened")
-    except (FileNotFoundError, TypeError):
-        print('-' * 120 + "\nGround-truth file not found")
-        dic_names = {}
-
-    try:
-        kk = dic_names[name]['K']
-        dic_gt = dic_names[name]
-        print("Matched ground-truth file!")
-    except KeyError:
-        dic_gt = None
-        x_factor = im_size[0] / 1600
-        y_factor = im_size[1] / 900
-        pixel_factor = (x_factor + y_factor) / 2   # TODO remove and check it
-        if im_size[0] / im_size[1] > 2.5:
-            kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]]  # Kitti calibration
-        else:
-            kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor],
-                  [0, 1266.4 * pixel_factor, 491.5 * y_factor],
-                  [0., 0., 1.]]  # nuScenes calibration
-
-        print("Using a standard calibration matrix...")
-
-    return kk, dic_gt
-
-
-def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None):
-    """Output json files or images according to the choice"""
-
-    # Save json file
-    if 'pifpaf' in args.networks:
-        keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:]
-
-        # Visualizer
-        keypoint_painter = show.KeypointPainter(show_box=False)
-        skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
-                                                markersize=1, linewidth=4)
-
-        if 'json' in args.output_types and keypoint_sets.size > 0:
-            with open(output_path + '.pifpaf.json', 'w') as f:
-                json.dump(pifpaf_out, f)
-
-        if 'keypoints' in args.output_types:
-            with show.image_canvas(images_outputs[0],
-                                   output_path + '.keypoints.png',
-                                   show=args.show,
-                                   fig_width=args.figure_width,
-                                   dpi_factor=args.dpi_factor) as ax:
-                keypoint_painter.keypoints(ax, keypoint_sets)
-
-        if 'skeleton' in args.output_types:
-            with show.image_canvas(images_outputs[0],
-                                   output_path + '.skeleton.png',
-                                   show=args.show,
-                                   fig_width=args.figure_width,
-                                   dpi_factor=args.dpi_factor) as ax:
-                skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
-
-    if 'monoloco' in args.networks:
-        if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])):
-            epistemic = False
-            if args.n_dropout > 0:
-                epistemic = True
-
-            if dic_out['boxes']:  # Only print in case of detections
-                printer = Printer(images_outputs[1], output_path, kk, output_types=args.output_types
-                                  , z_max=args.z_max, epistemic=epistemic)
-                figures, axes = printer.factory_axes()
-                printer.draw(figures, axes, dic_out, images_outputs[1], draw_box=args.draw_box,
-                             save=True, show=args.show)
-
-        if 'json' in args.output_types:
-            with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
-                json.dump(dic_out, ff)
--- a/monoloco/predict/predict.py
+++ b/monoloco/predict/predict.py
@ -1,72 +0,0 @@
-
-import os
-from PIL import Image
-
-import torch
-
-from ..predict.pifpaf import PifPaf, ImageList
-from ..predict.network import MonoLoco
-from ..predict.factory import factory_for_gt, factory_outputs
-from ..utils.pifpaf import preprocess_pif
-
-
-def predict(args):
-
-    cnt = 0
-
-    # load pifpaf and monoloco models
-    pifpaf = PifPaf(args)
-    monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
-
-    # data
-    data = ImageList(args.images, scale=args.scale)
-    data_loader = torch.utils.data.DataLoader(
-        data, batch_size=1, shuffle=False,
-        pin_memory=args.pin_memory, num_workers=args.loader_workers)
-
-    for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
-        images = image_tensors.permute(0, 2, 3, 1)
-
-        processed_images = processed_images_cpu.to(args.device, non_blocking=True)
-        fields_batch = pifpaf.fields(processed_images)
-
-        # unbatch
-        for image_path, image, processed_image_cpu, fields in zip(
-                image_paths, images, processed_images_cpu, fields_batch):
-
-            if args.output_directory is None:
-                output_path = image_path
-            else:
-                file_name = os.path.basename(image_path)
-                output_path = os.path.join(args.output_directory, file_name)
-            print('image', idx, image_path, output_path)
-
-            keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
-            pifpaf_outputs = [keypoint_sets, scores, pifpaf_out]  # keypoints_sets and scores for pifpaf printing
-            images_outputs = [image]  # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
-
-            if 'monoloco' in args.networks:
-                im_size = (float(image.size()[1] / args.scale),
-                           float(image.size()[0] / args.scale))  # Width, Height (original)
-
-                # Extract calibration matrix and ground truth file if present
-                with open(image_path, 'rb') as f:
-                    pil_image = Image.open(f).convert('RGB')
-                    images_outputs.append(pil_image)
-
-                im_name = os.path.basename(image_path)
-
-                kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
-
-                # Preprocess pifpaf outputs and run monoloco
-                boxes, keypoints = preprocess_pif(pifpaf_out, im_size)
-                outputs, varss = monoloco.forward(keypoints, kk)
-                dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dic_gt)
-
-            else:
-                dic_out = None
-                kk = None
-
-            factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
-            print('Image {}\n'.format(cnt) + '-' * 120)
-            cnt += 1
--- a/monoloco/prep/init.py
+++ b/monoloco/prep/init.py
@ -0,0 +1,3 @@
+
+from .preprocess_nu import PreprocessNuscenes
+from .preprocess_ki import PreprocessKitti
--- a/monoloco/prep/preprocess_ki.py
+++ b/monoloco/prep/preprocess_ki.py
@ -8,12 +8,9 @@ from collections import defaultdict
 import json
 import datetime

-from ..prep.transforms import transform_keypoints
-from ..utils.kitti import get_calibration, split_training, parse_ground_truth
-from ..utils.network import get_monoloco_inputs
-from ..utils.pifpaf import preprocess_pif
-from ..utils.iou import get_iou_matches
-from ..utils.misc import append_cluster
+from .transforms import transform_keypoints
+from ..utils import get_calibration, split_training, parse_ground_truth, get_iou_matches, append_cluster
+from ..network.process import preprocess_pifpaf, preprocess_monoloco


 class PreprocessKitti:
@ -84,10 +81,10 @@ class PreprocessKitti:
            try:
                with open(os.path.join(self.dir_ann, basename + '.png.pifpaf.json'), 'r') as f:
                    annotations = json.load(f)
-                boxes, keypoints = preprocess_pif(annotations, im_size=(1238, 374))
+                boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1238, 374))
                keypoints_hflip = transform_keypoints(keypoints, mode='flip')
-                inputs = get_monoloco_inputs(keypoints, kk).tolist()
-                inputs_hflip = get_monoloco_inputs(keypoints, kk).tolist()
+                inputs = preprocess_monoloco(keypoints, kk).tolist()
+                inputs_hflip = preprocess_monoloco(keypoints, kk).tolist()
                all_keypoints = [keypoints, keypoints_hflip]
                all_inputs = [inputs, inputs_hflip]

--- a/monoloco/prep/preprocess_nu.py
+++ b/monoloco/prep/preprocess_nu.py
@ -10,16 +10,11 @@ from collections import defaultdict
 import datetime

 import numpy as np
-
 from nuscenes.nuscenes import NuScenes
 from nuscenes.utils import splits

-from ..utils.iou import get_iou_matches
-from ..utils.misc import append_cluster
-from ..utils.nuscenes import select_categories
-from ..utils.camera import project_3d
-from ..utils.pifpaf import preprocess_pif
-from ..utils.network import get_monoloco_inputs
+from ..utils import get_iou_matches, append_cluster, select_categories, project_3d
+from ..network.process import preprocess_pifpaf, preprocess_monoloco


 class PreprocessNuscenes:
@ -97,12 +92,12 @@ class PreprocessNuscenes:
                    if exists:
                        with open(path_pif, 'r') as file:
                            annotations = json.load(file)
-                            boxes, keypoints = preprocess_pif(annotations, im_size=(1600, 900))
+                            boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900))
                    else:
                        continue

                    if keypoints:
-                        inputs = get_monoloco_inputs(keypoints, kk).tolist()
+                        inputs = preprocess_monoloco(keypoints, kk).tolist()

                        matches = get_iou_matches(boxes, boxes_gt, self.iou_min)
                        for (idx, idx_gt) in matches:
--- a/monoloco/run.py
+++ b/monoloco/run.py
@ -1,20 +1,11 @@

 # pylint: skip-file
+
 import argparse

 from openpifpaf.network import nets
 from openpifpaf import decoder

-from .prep.preprocess_nu import PreprocessNuscenes
-from .prep.preprocess_ki import PreprocessKitti
-from .predict.predict import predict
-from .train.trainer import Trainer
-from .eval.generate_kitti import GenerateKitti
-from .eval.geom_baseline import geometric_baseline
-from .train.hyp_tuning import HypTuning
-from .eval.eval_kitti import EvalKitti
-from .visuals.webcam import webcam
-

 def cli():

@ -105,28 +96,33 @@ def cli():

 def main():
    args = cli()
-
    if args.command == 'predict':
        if args.webcam:
+            from .visuals.webcam import webcam
            webcam(args)
        else:
+            from .predict import predict
            predict(args)

    elif args.command == 'prep':
        if 'nuscenes' in args.dataset:
+            from .prep import PreprocessNuscenes
            prep = PreprocessNuscenes(args.dir_ann, args.dir_nuscenes, args.dataset, args.iou_min)
            prep.run()
        if 'kitti' in args.dataset:
+            from .prep import PreprocessKitti
            prep = PreprocessKitti(args.dir_ann, args.iou_min)
            prep.run()

    elif args.command == 'train':
+        from .train import HypTuning
        if args.hyp:
            hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs,
                                   baseline=args.baseline, dropout=args.dropout,
                                   multiplier=args.multiplier, r_seed=args.r_seed)
            hyp_tuning.train()
        else:
+            from .train import Trainer
            training = Trainer(joints=args.joints, epochs=args.epochs, bs=args.bs,
                               baseline=args.baseline, dropout=args.dropout, lr=args.lr, sched_step=args.sched_step,
                               n_stage=args.n_stage, sched_gamma=args.sched_gamma, hidden_size=args.hidden_size,
@ -137,20 +133,24 @@ def main():

    elif args.command == 'eval':
        if args.geometric:
+            from .eval import geometric_baseline
            geometric_baseline(args.joints)

        if args.generate:
+            from .eval import GenerateKitti
            kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout)
            kitti_txt.run_mono()
            if args.stereo:
                kitti_txt.run_stereo()

        if args.dataset == 'kitti':
+            from .eval import EvalKitti
            kitti_eval = EvalKitti(verbose=args.verbose, stereo=args.stereo)
            kitti_eval.run()
            kitti_eval.printer(show=args.show)

        if 'nuscenes' in args.dataset:
+            from .train import Trainer
            training = Trainer(joints=args.joints)
            _ = training.evaluate(load=True, model=args.model, debug=False)

--- a/monoloco/train/init.py
+++ b/monoloco/train/init.py
@ -0,0 +1,3 @@
+
+from .hyp_tuning import HypTuning
+from .trainer import Trainer
--- a/monoloco/train/trainer.py
+++ b/monoloco/train/trainer.py
@ -1,4 +1,4 @@
-# pylint: skip-file  # TODO
+# pylint: skip-file  # TODO make train file and class trainer and

 """
 Training and evaluation of a neural network which predicts 3D localization and confidence intervals
@ -19,11 +19,11 @@ import torch.nn as nn
 from torch.utils.data import DataLoader
 from torch.optim import lr_scheduler

-from .datasets import KeypointsDataset
-from .architectures import LinearModel
-from .losses import LaplacianLoss
-from ..utils.logs import set_logger
-from ..utils.network import laplace_sampling, unnormalize_bi
+from train.datasets import KeypointsDataset
+from ..network import LaplacianLoss
+from ..network.process import laplace_sampling, unnormalize_bi
+from ..network.architectures import LinearModel
+from ..utils import set_logger


 class Trainer:
--- a/monoloco/utils/init.py
+++ b/monoloco/utils/init.py
@ -0,0 +1,8 @@
+
+from .iou import get_iou_matches, reorder_matches, get_iou_matrix
+from .misc import get_task_error, get_pixel_error, append_cluster
+from .kitti import check_conditions, get_category, split_training, parse_ground_truth, get_calibration
+from .camera import xyz_from_distance, get_keypoints, pixel_to_camera, project_3d
+from .logs import set_logger
+from .stereo import depth_from_disparity
+from ..utils.nuscenes import select_categories
--- a/monoloco/utils/misc.py
+++ b/monoloco/utils/misc.py
@ -1,6 +1,4 @@

-import random
-

 def append_cluster(dic_jo, phase, xx, dd, kps):
    """Append the annotation based on its distance"""
@ -27,20 +25,19 @@ def append_cluster(dic_jo, phase, xx, dd, kps):


 def get_task_error(dd, mode='std'):
-    """Get target error not knowing the gender"""
+    """Get target error not knowing the gender, modeled through a Gaussian Mixure model"""
    assert mode in ('std', 'mad')
+    h_mean = 171.5  # average h of the human distribution
    if mode == 'std':
-        mm_gender = 0.0557
-    elif mode == 'mad':  # mean absolute deviation
-        mm_gender = 0.0457
-    return mm_gender * dd
+        delta_h = 9.07  # delta h for 63% confidence interval
+    elif mode == 'mad':
+        delta_h = 7.83  # delta_h of mean absolute deviation
+    return dd * (1 - h_mean / (h_mean + delta_h))


 def get_pixel_error(dd_gt, zz_gt):
-    """calculate error in stereo distance due to +-1 pixel mismatch (function of depth)"""
+    """calculate error in stereo distance due to 1 pixel mismatch (function of depth)"""

    disp = 0.54 * 721 / zz_gt
-    random.seed(1)
-    sign = random.choice((-1, 1))
-    delta_z = zz_gt - 0.54 * 721 / (disp + sign)
+    delta_z = zz_gt - 0.54 * 721 / (disp - 1)
    return dd_gt + delta_z
--- a/monoloco/utils/network.py
+++ b/monoloco/utils/network.py
@ -1,67 +0,0 @@
-
-import numpy as np
-import torch
-from ..utils.camera import get_keypoints, pixel_to_camera
-
-
-def get_monoloco_inputs(keypoints, kk):
-
-    """ Preprocess batches of inputs
-    keypoints = torch tensors of (m, 3, 17)  or list [3,17]
-    Outputs =  torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box
-    """
-    if isinstance(keypoints, list):
-        keypoints = torch.tensor(keypoints)
-    if isinstance(kk, list):
-        kk = torch.tensor(kk)
-    # Projection in normalized image coordinates and zero-center with the center of the bounding box
-    uv_center = get_keypoints(keypoints, mode='center')
-    xy1_center = pixel_to_camera(uv_center, kk, 10)
-    xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10)
-    # xy1_center[:, 1].fill_(0)  #TODO
-    kps_norm = xy1_all - xy1_center.unsqueeze(1)  # (m, 17, 3) - (m, 1, 3)
-    kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1)  # no contiguous for view
-    return kps_out
-
-
-def laplace_sampling(outputs, n_samples):
-
-    # np.random.seed(1)
-    mu = outputs[:, 0]
-    bi = torch.abs(outputs[:, 1])
-
-    # Analytical
-    # uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0])
-    # xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu))
-
-    # Sampling
-    cuda_check = outputs.is_cuda
-    if cuda_check:
-        get_device = outputs.get_device()
-        device = torch.device(type="cuda", index=get_device)
-    else:
-        device = torch.device("cpu")
-
-    laplace = torch.distributions.Laplace(mu, bi)
-    xx = laplace.sample((n_samples,)).to(device)
-
-    return xx
-
-
-def epistemic_variance(total_outputs):
-    """Compute epistemic variance"""
-
-    # var_y = np.sum(total_outputs**2, axis=0) / total_outputs.shape[0] - (np.mean(total_outputs, axis=0))**2
-    var_y = np.var(total_outputs, axis=0)
-    lower_b = np.quantile(a=total_outputs, q=0.25, axis=0)
-    upper_b = np.quantile(a=total_outputs, q=0.75, axis=0)
-    var_new = (upper_b - lower_b)
-
-    return var_y, var_new
-
-
-def unnormalize_bi(outputs):
-    """Unnormalize relative bi of a nunmpy array"""
-
-    outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0]
-    return outputs
--- a/monoloco/utils/nuscenes.py
+++ b/monoloco/utils/nuscenes.py
@ -2,6 +2,7 @@
 import random
 import json
 import os
+
 import numpy as np


--- a/monoloco/utils/pifpaf.py
+++ b/monoloco/utils/pifpaf.py
@ -1,54 +0,0 @@
-
-import numpy as np
-
-
-def preprocess_pif(annotations, im_size=None):
-    """
-    Preprocess pif annotations:
-    1. enlarge the box of 10%
-    2. Constraint it inside the image (if image_size provided)
-    """
-
-    boxes = []
-    keypoints = []
-
-    for dic in annotations:
-        box = dic['bbox']
-        if box[3] < 0.5:  # Check for no detections (boxes 0,0,0,0)
-            return [], []
-
-        kps = prepare_pif_kps(dic['keypoints'])
-        conf = float(np.sort(np.array(kps[2]))[-3])  # The confidence is the 3rd highest value for the keypoints
-
-        # Add 15% for y and 20% for x
-        delta_h = (box[3] - box[1]) / 7
-        delta_w = (box[2] - box[0]) / 3.5
-        assert delta_h > -5 and delta_w > -5, "Bounding box <=0"
-        box[0] -= delta_w
-        box[1] -= delta_h
-        box[2] += delta_w
-        box[3] += delta_h
-
-        # Put the box inside the image
-        if im_size is not None:
-            box[0] = max(0, box[0])
-            box[1] = max(0, box[1])
-            box[2] = min(box[2], im_size[0])
-            box[3] = min(box[3], im_size[1])
-
-        box.append(conf)
-        boxes.append(box)
-        keypoints.append(kps)
-
-    return boxes, keypoints
-
-
-def prepare_pif_kps(kps_in):
-    """Convert from a list of 51 to a list of 3, 17"""
-
-    assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3"
-    xxs = kps_in[0:][::3]
-    yys = kps_in[1:][::3]  # from offset 1 every 3
-    ccs = kps_in[2:][::3]
-
-    return [xxs, yys, ccs]
--- a/monoloco/utils/stereo.py
+++ b/monoloco/utils/stereo.py
@ -49,7 +49,7 @@ def filter_disparities(kps, kps_right_list, idx, expected_disps):
            disparity_y_conf = np.where(mask_conf, disparity_y, np.nan)

            # Mask outliers using iqr
-            mask_outlier = get_iqr_mask(disparity_x_conf)
+            mask_outlier = interquartile_mask(disparity_x_conf)
            disparity_x_mask = np.where(mask_outlier, disparity_x_conf, np.nan)
            disparity_y_mask = np.where(mask_outlier, disparity_y_conf, np.nan)
            avg_disparity_x = np.nanmedian(disparity_x_mask, axis=1)  # ignore the nan
@ -79,7 +79,7 @@ def verify_stereo(zz_stereo, zz_mono, disparity_x, disparity_y):
    return False


-def get_iqr_mask(distribution):
+def interquartile_mask(distribution):
    quartile_1, quartile_3 = np.nanpercentile(distribution, [25, 75], axis=1)
    iqr = quartile_3 - quartile_1
    lower_bound = quartile_1 - (iqr * 1.5)
--- a/monoloco/visuals/init.py
+++ b/monoloco/visuals/init.py
@ -0,0 +1,3 @@
+
+from .printer import Printer
+from .results import print_results
--- a/monoloco/visuals/paper.py
+++ b/monoloco/visuals/paper.py
@ -1,11 +1,12 @@
 # pylint: skip-file

-import numpy as np
-import os
 import math
+
+import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.patches import Ellipse
-from visuals.printer import get_angle
+
+from .printer import get_angle


 def paper():
@ -112,30 +113,24 @@ def paper():
        plt.close()


-
 def target_error(xx, mm):
    return mm * xx

+
 def gmm():
    mu_men = 178
    std_men = 7
    mu_women = 165
    std_women = 7
-    N_men_1 = np.random.normal(mu_men, std_men, 1000000)
-    N_men_2 = np.random.normal(mu_men, std_men, 1000000)
-    N_women_1 = np.random.normal(mu_women, std_women, 1000000)
-    N_women_2 = np.random.normal(mu_women, std_women, 1000000)
-    N_gmm_1 = np.concatenate((N_men_1, N_women_1))
-    N_gmm_2 = np.concatenate((N_men_2, N_women_2))
-    mu_gmm_1 = np.mean(N_gmm_1)
-    mu_gmm_2 = np.mean(N_gmm_2)
-    std_gmm = np.std(N_gmm_1)
-    mm_gender = std_gmm / mu_gmm_1
-    var_gmm = np.var(N_gmm_1)
-    abs_diff_1 = np.abs(mu_gmm_1 - N_gmm_1)
-    abs_diff_2 = np.mean(np.abs(N_gmm_1 - N_gmm_2))
-    mean_deviation_1 = np.mean(abs_diff_1)
-    mean_deviation_2 = np.mean(abs_diff_2)
+    N_men = np.random.normal(mu_men, std_men, 10000000)
+    N_women = np.random.normal(mu_women, std_women, 10000000)
+    N_gmm = np.concatenate((N_men, N_women))
+    perc, _ = np.nanpercentile(N_gmm, [18.5, 81.5])  # Laplace bi => 63%
+    mu_gmm = np.mean(N_gmm)
+    bi_gmm = mu_gmm - perc
+    abs_diff = np.abs(mu_gmm - N_gmm)
+
+    mean_deviation = np.mean(abs_diff)
    # sns.distplot(N_men, hist=False, rug=False, label="Men")
    # sns.distplot(N_women, hist=False, rug=False, label="Women")
    # sns.distplot(N_gmm, hist=False, rug=False, label="GMM")
@ -143,15 +138,10 @@ def gmm():
    # plt.ylabel("Height distributions of men and women")
    # plt.legend()
    # plt.show()
-    print("Mean of GMM distribution: {:.2f}".format(mu_gmm_1))
-    print("Standard deviation: {:.2f}".format(std_gmm))
-    print("Relative error (standard deviation) {:.3f} %".format(mm_gender * 100))
-    print("Variance: {:.2f}".format(var_gmm))
-    print("Mean deviation: {:.2f}".format(mean_deviation_1))
-    print("Mean deviation 2: {:.2f}".format(mean_deviation_2))
-    print("Relative error (mean absolute deviation): {:.3f} %".format((mean_deviation_1 / mu_gmm_1) * 100))
-
-    return mm_gender
+    print("Mean of GMM distribution: {:.2f}".format(mu_gmm))
+    print("+- bi interval (63%) : {:.2f}".format(bi_gmm))
+    print("Mean deviation: {:.2f}".format(mean_deviation))
+    print("Relative error (mean absolute deviation): {:.3f} %".format((mean_deviation / mu_gmm) * 100))


 def get_confidence(xx, zz, std):
@ -160,4 +150,4 @@ def get_confidence(xx, zz, std):

    delta_x = std * math.cos(theta)
    delta_z = std * math.sin(theta)
-    return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z)
+    return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z)
--- a/monoloco/visuals/printer.py
+++ b/monoloco/visuals/printer.py
@ -9,8 +9,7 @@ import matplotlib.cm as cm
 from matplotlib.patches import Ellipse, Circle, Rectangle
 from mpl_toolkits.axes_grid1 import make_axes_locatable

-from ..utils.camera import pixel_to_camera
-from ..utils.misc import get_task_error
+from ..utils import pixel_to_camera, get_task_error


 class Printer:
--- a/monoloco/visuals/results.py
+++ b/monoloco/visuals/results.py
@ -1,6 +1,7 @@
 # pylint: disable=R0915

 import os
+
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.patches import Ellipse
--- a/monoloco/visuals/webcam.py
+++ b/monoloco/visuals/webcam.py
@ -12,14 +12,11 @@ import torch
 import matplotlib.pyplot as plt
 from PIL import Image
 from openpifpaf import transforms
-
 import cv2

-from ..visuals.printer import Printer
-from ..utils.pifpaf import preprocess_pif
-from ..predict.pifpaf import PifPaf
-from ..predict.network import MonoLoco
-from ..predict.factory import factory_for_gt
+from ..visuals import Printer
+from ..network import PifPaf, MonoLoco
+from ..network.process import preprocess_pifpaf, factory_for_gt


 def webcam(args):
@ -66,7 +63,7 @@ def webcam(args):
            visualizer_monoloco.send(None)

        if pifpaf_out:
-            boxes, keypoints = preprocess_pif(pifpaf_out, (width, height))
+            boxes, keypoints = preprocess_pifpaf(pifpaf_out, (width, height))
            outputs, varss = monoloco.forward(keypoints, kk)
            dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dict_gt)
            visualizer_monoloco.send((pil_image, dic_out))
--- a/setup.py
+++ b/setup.py
@ -10,9 +10,9 @@ setup(
    version=VERSION,
    packages=[
        'monoloco',
-        'monoloco.train',
-        'monoloco.predict',
+        'monoloco.network',
        'monoloco.eval',
+        'monoloco.train',
        'monoloco.prep',
        'monoloco.visuals',
        'monoloco.utils'
@ -28,9 +28,15 @@ setup(

    install_requires=[
        'openpifpaf',
-        'nuscenes-devkit',  # for nuScenes dataset preprocessing
        'tabulate',   # For evaluation
-        'pylint',
-        'pytest',
    ],
+    extras_require={
+        'test': [
+            'pylint',
+            'pytest',
+        ],
+        'prep': [
+            'nuscenes-devkit',
+        ],
+    },
 )
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -6,7 +6,7 @@ sys.path.insert(0, os.path.join('..', 'monoloco'))


 def test_iou():
-    from monoloco.utils.iou import get_iou_matrix
+    from monoloco.utils import get_iou_matrix
    boxes_pred = [[1, 100, 1, 200]]
    boxes_gt = [[100., 120., 150., 160.],[12, 110, 130., 160.]]
    iou_matrix = get_iou_matrix(boxes_pred, boxes_gt)
@ -14,7 +14,7 @@ def test_iou():


 def test_pixel_to_camera():
-    from monoloco.utils.camera import pixel_to_camera
+    from monoloco.utils import pixel_to_camera
    kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]]
    zz = 10
    uv_vector = [1000., 400.]