refactor predict (#1)

* add stereo arg (#1) * stereo skeleton * skeleton stereo (#2) * skeleton stereo (#3) * refactor for stereo * stereo running * modify stereo parameters * remove warnings as errors * add p2_right * add factory file * add p2_right * add scaling factor to intrinsic matrix * Refactor predict * refactor predict 2 * temp * add uppercase constants * add uppercase constants * working predict * add todo * Add person_sitting as option to uncomment * turn off stereo * remove stereo * pylint corrections * add break point * temp * fix small bug * Add compatibility for pifpaf and monoloco networks * pylint fix
2019-06-20 15:43:38 +02:00 · 2019-06-20 15:43:38 +02:00 · 86438189a7
commit 86438189a7
parent d941a5bdc7
12 changed files with 391 additions and 242 deletions
--- a/docs/002282.png.combined.png
+++ b/docs/002282.png.combined.png
--- a/src/eval/run_kitti.py
+++ b/src/eval/run_kitti.py
@ -1,16 +1,22 @@
+"""Run monoloco over all the pifpaf joints of KITTI images
+and extract and save the annotations in txt files"""
+

-import torch
 import math
-import numpy as np
 import os
 import glob
 import json
 import logging
+
+import numpy as np
+import torch
+
 from models.architectures import LinearModel
 from utils.misc import laplace_sampling
 from utils.kitti import eval_geometric, get_calibration
 from utils.normalize import unnormalize_bi
 from utils.pifpaf import get_input_data, preprocess_pif
+from utils.camera import get_depth_from_distance


 class RunKitti:
@ -23,22 +29,17 @@ class RunKitti:
    average_y = 0.48
    n_samples = 100

-    def __init__(self, model, dir_ann, dropout, hidden_size, n_stage, n_dropout, stereo=False):
+    def __init__(self, model, dir_ann, dropout, hidden_size, n_stage, n_dropout):

-        # Set directories
-        assert dir_ann, "Annotations folder is required"
        self.dir_ann = dir_ann
        self.n_dropout = n_dropout
-
-        list_ann = glob.glob(os.path.join(dir_ann, '*.json'))
        self.dir_kk = os.path.join('data', 'kitti', 'calib')
        self.dir_out = os.path.join('data', 'kitti', 'monoloco')
        if not os.path.exists(self.dir_out):
            os.makedirs(self.dir_out)
            print("Created output directory for txt files")

-        self.list_basename = [os.path.basename(x).split('.')[0] for x in list_ann]
-        assert self.list_basename, " Missing json annotations file to create txt files for KITTI datasets"
+        self.list_basename = factory_basename(dir_ann)

        # Load the model
        input_size = 17 * 2
@ -54,86 +55,131 @@ class RunKitti:

        # Run inference
        for basename in self.list_basename:
-            path_calib = os.path.join(self.dir_kk, basename + '.txt')
-            kk, tt = get_calibration(path_calib)

-            path_ann = os.path.join(self.dir_ann, basename + '.png.pifpaf.json')
-            with open(path_ann, 'r') as f:
-                annotations = json.load(f)
+            path_calib = os.path.join(self.dir_kk, basename + '.txt')
+
+            annotations, kk, tt, _ = factory_file(path_calib, self.dir_ann, basename)

            boxes, keypoints = preprocess_pif(annotations)
            (inputs, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = get_input_data(boxes, keypoints, kk)

            dds_geom, xy_centers = eval_geometric(uv_kps, uv_centers, uv_shoulders, kk, average_y=0.48)

+            # Update counting
            self.cnt_ann += len(boxes)
-
-            inputs = torch.from_numpy(np.array(inputs)).float().to(self.device)
-
-            if len(inputs) == 0:
+            if not inputs:
                self.cnt_no_file += 1
-
            else:
                self.cnt_file += 1

-                if self.n_dropout > 0:
-                    total_outputs = torch.empty((0, len(uv_boxes))).to(self.device)
-                    self.model.dropout.training = True
-                    for ii in range(self.n_dropout):
-                        outputs = self.model(inputs)
-                        outputs = unnormalize_bi(outputs)
-                        samples = laplace_sampling(outputs, self.n_samples)
-                        total_outputs = torch.cat((total_outputs, samples), 0)
-                    varss = total_outputs.std(0)
+            # Run the model
+            inputs = torch.from_numpy(np.array(inputs)).float().to(self.device)
+            if self.n_dropout > 0:
+                total_outputs = torch.empty((0, len(uv_boxes))).to(self.device)
+                self.model.dropout.training = True
+                for _ in range(self.n_dropout):
+                    outputs = self.model(inputs)
+                    outputs = unnormalize_bi(outputs)
+                    samples = laplace_sampling(outputs, self.n_samples)
+                    total_outputs = torch.cat((total_outputs, samples), 0)
+                varss = total_outputs.std(0)

-                else:
-                    varss = [0]*len(uv_boxes)
+            else:
+                varss = [0]*len(uv_boxes)

-                # Don't use dropout for the mean prediction and aleatoric uncertainty
-                self.model.dropout.training = False
-                outputs_net = self.model(inputs)
-                outputs = outputs_net.cpu().detach().numpy()
+            # Don't use dropout for the mean prediction and aleatoric uncertainty
+            self.model.dropout.training = False
+            outputs_net = self.model(inputs)
+            outputs = outputs_net.cpu().detach().numpy()

-                path_txt = os.path.join(self.dir_out, basename + '.txt')
-                with open(path_txt, "w+") as ff:
-                    for idx in range(outputs.shape[0]):
-                        xx_1 = float(xy_centers[idx][0])
-                        yy_1 = float(xy_centers[idx][1])
-                        xy_kp = xy_kps[idx]
-                        dd = float(outputs[idx][0])
-                        std_ale = math.exp(float(outputs[idx][1])) * dd
+            list_zzs = get_depth_from_distance(outputs, xy_centers)
+            all_outputs = [outputs, varss, dds_geom]
+            all_inputs = [uv_boxes, xy_centers, xy_kps]
+            all_params = [kk, tt]

-                        zz = dd / math.sqrt(1 + xx_1**2 + yy_1**2)
-                        xx_cam_0 = xx_1*zz + tt[0]  # Still to verify the sign but negligible
-                        yy_cam_0 = yy_1*zz + tt[1]
-                        zz_cam_0 = zz + tt[2]
-                        dd_cam_0 = math.sqrt(xx_cam_0**2 + yy_cam_0**2 + zz_cam_0**2)
-
-                        uv_box = uv_boxes[idx]
-
-                        twodecimals = ["%.3f" % vv for vv in [uv_box[0], uv_box[1], uv_box[2], uv_box[3],
-                                                              xx_cam_0, yy_cam_0, zz_cam_0, dd_cam_0,
-                                                              std_ale, varss[idx], uv_box[4], dds_geom[idx]]]
-
-                        keypoints_str = ["%.5f" % vv for vv in xy_kp]
-                        for item in twodecimals:
-                            ff.write("%s " % item)
-                        for item in keypoints_str:
-                            ff.write("%s " % item)
-                        ff.write("\n")
-
-                    # Save intrinsic matrix in the last row
-                    kk_list = kk.reshape(-1,).tolist()
-                    for kk_el in kk_list:
-                        ff.write("%f " % kk_el)
-                    ff.write("\n")
+            # Save the file
+            all_outputs.append(list_zzs)
+            path_txt = os.path.join(self.dir_out, basename + '.txt')
+            save_txts(path_txt, all_inputs, all_outputs, all_params)
+            aa = 5

        # Print statistics
        print("Saved in {} txt {} annotations. Not found {} images"
              .format(self.cnt_file, self.cnt_ann, self.cnt_no_file))


-
+def save_txts(path_txt, all_inputs, all_outputs, all_params):
+
+    outputs, varss, dds_geom, zzs = all_outputs[:]
+    uv_boxes, xy_centers, xy_kps = all_inputs[:]
+    kk, tt = all_params[:]
+
+    with open(path_txt, "w+") as ff:
+        for idx in range(outputs.shape[0]):
+            xx_1 = float(xy_centers[idx][0])
+            yy_1 = float(xy_centers[idx][1])
+            xy_kp = xy_kps[idx]
+            dd = float(outputs[idx][0])
+            std_ale = math.exp(float(outputs[idx][1])) * dd
+            zz = zzs[idx]
+            xx_cam_0 = xx_1 * zz + tt[0]
+            yy_cam_0 = yy_1 * zz + tt[1]
+            zz_cam_0 = zz + tt[2]
+            dd_cam_0 = math.sqrt(xx_cam_0 ** 2 + yy_cam_0 ** 2 + zz_cam_0 ** 2)
+
+            uv_box = uv_boxes[idx]
+
+            twodecimals = ["%.3f" % vv for vv in [uv_box[0], uv_box[1], uv_box[2], uv_box[3],
+                                                  xx_cam_0, yy_cam_0, zz_cam_0, dd_cam_0,
+                                                  std_ale, varss[idx], uv_box[4], dds_geom[idx]]]
+
+            keypoints_str = ["%.5f" % vv for vv in xy_kp]
+            for item in twodecimals:
+                ff.write("%s " % item)
+            for item in keypoints_str:
+                ff.write("%s " % item)
+            ff.write("\n")
+
+        # Save intrinsic matrix in the last row
+        kk_list = kk.reshape(-1, ).tolist()
+        for kk_el in kk_list:
+            ff.write("%f " % kk_el)
+        ff.write("\n")
+
+
+
+def factory_basename(dir_ann):
+    """ Return all the basenames in the annotations folder"""
+
+    list_ann = glob.glob(os.path.join(dir_ann, '*.json'))
+    list_basename = [os.path.basename(x).split('.')[0] for x in list_ann]
+    assert list_basename, " Missing json annotations file to create txt files for KITTI datasets"
+
+    return list_basename
+
+
+def factory_file(path_calib, dir_ann, basename, ite=0):
+    """Choose the annotation and the calibration files. Stereo option with ite = 1"""
+
+    stereo_file = True
+    p_left, p_right = get_calibration(path_calib)
+
+    if ite == 0:
+        kk, tt = p_left[:]
+        path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json')
+    else:
+        kk, tt = p_right[:]
+        path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json')
+
+    try:
+        with open(path_ann, 'r') as f:
+            annotations = json.load(f)
+    except FileNotFoundError:
+        annotations = None
+        if ite == 1:
+            stereo_file = False
+
+    return annotations, kk, tt, stereo_file



--- a/src/features/preprocess_ki.py
+++ b/src/features/preprocess_ki.py
@ -64,7 +64,8 @@ class PreprocessKitti:

            # Extract keypoints
            path_txt = os.path.join(self.dir_kk, basename + '.txt')
-            kk, tt = get_calibration(path_txt)
+            p_left, _ = get_calibration(path_txt)
+            kk = p_left[0]

            # Iterate over each line of the gt file and save box location and distances
            boxes_gt, dds_gt, _, _ = parse_ground_truth(path_gt)
--- a/src/main.py
+++ b/src/main.py
@ -9,7 +9,7 @@ from openpifpaf.network import nets
 from openpifpaf import decoder
 from features.preprocess_nu import PreprocessNuscenes
 from features.preprocess_ki import PreprocessKitti
-from predict.predict_2d_3d import predict
+from predict.predict import predict
 from models.trainer import Trainer
 from eval.run_kitti import RunKitti
 from eval.geom_baseline import GeomBaseline
--- a/src/predict/factory.py
+++ b/src/predict/factory.py
@ -0,0 +1,91 @@
+
+import json
+import os
+from visuals.printer import Printer
+from openpifpaf import show
+
+from PIL import Image
+
+
+def factory_for_gt(image, name=None, path_gt=None):
+    """Look for ground-truth annotations file and define calibration matrix based on image size """
+
+    try:
+        with open(path_gt, 'r') as f:
+            dic_names = json.load(f)
+        print('-' * 120 + "\nMonoloco: Ground-truth file opened\n")
+    except FileNotFoundError:
+        print('-' * 120 + "\nMonoloco: ground-truth file not found\n")
+        dic_names = {}
+
+    try:
+        kk = dic_names[name]['K']
+        dic_gt = dic_names[name]
+        print("Monoloco: matched ground-truth file!\n" + '-' * 120)
+    except KeyError:
+        dic_gt = None
+        x_factor = image.size[0] / 1600
+        y_factor = image.size[1] / 900
+        pixel_factor = (x_factor + y_factor) / 2
+        if image.size[0] / image.size[1] > 2.5:
+            kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]]  # Kitti calibration
+        else:
+            kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor],
+                  [0, 1266.4 * pixel_factor, 491.5 * y_factor],
+                  [0., 0., 1.]]  # nuScenes calibration
+
+        print("Ground-truth annotations for the image not found\n" 
+              "Using a standard calibration matrix...\n" + '-' * 120)
+
+    return kk, dic_gt
+
+
+def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_outputs=None, kk=None):
+    """Output json files or images according to the choice"""
+
+    # Save json file
+    if 'pifpaf' in args.networks:
+
+        keypoint_sets, pifpaf_out, scores = pifpaf_outputs[:]
+
+        # Visualizer
+        keypoint_painter = show.KeypointPainter(show_box=True)
+        skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
+                                                markersize=1, linewidth=4)
+
+        if 'json' in args.output_types and keypoint_sets.size > 0:
+            with open(output_path + '.pifpaf.json', 'w') as f:
+                json.dump(pifpaf_out, f)
+
+        if 'keypoints' in args.output_types:
+            with show.image_canvas(images_outputs[0],
+                                   output_path + '.keypoints.png',
+                                   show=args.show,
+                                   fig_width=args.figure_width,
+                                   dpi_factor=args.dpi_factor) as ax:
+                keypoint_painter.keypoints(ax, keypoint_sets)
+
+        if 'skeleton' in args.output_types:
+            with show.image_canvas(images_outputs[0],
+                                   output_path + '.skeleton.png',
+                                   show=args.show,
+                                   fig_width=args.figure_width,
+                                   dpi_factor=args.dpi_factor) as ax:
+                skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
+
+    if 'monoloco' in args.networks:
+        if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])):
+
+            epistemic = False
+            if args.n_dropout > 0:
+                epistemic = True
+
+            printer = Printer(images_outputs[1], output_path, monoloco_outputs, kk, output_types=args.output_types,
+                              show=args.show, z_max=args.z_max, epistemic=epistemic)
+            printer.print()
+
+        if 'json' in args.output_types:
+            with open(os.path.join(args.output_path + '.monoloco.json'), 'w') as ff:
+                json.dump(monoloco_outputs, ff)
+
+
--- a/src/predict/predict_monoloco.py
+++ b/src/predict/predict_monoloco.py
@ -1,65 +1,51 @@

 """
-From a json file output images and json annotations
+Monoloco predictor. It receives pifpaf joints and outputs distances
 """

-import sys
 from collections import defaultdict
-import os
-import json
 import logging
 import time

 import numpy as np
 import torch
-from PIL import Image

 from models.architectures import LinearModel
-from visuals.printer import Printer
 from utils.camera import get_depth
 from utils.misc import laplace_sampling, get_idx_max
 from utils.normalize import unnormalize_bi
 from utils.pifpaf import get_input_data


-class PredictMonoLoco:
+class MonoLoco:

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)
-    output_size = 2
-    input_size = 17 * 2
+    OUTPUT_SIZE = 2
+    INPUT_SIZE = 17 * 2
+    LINEAR_SIZE = 256
+    IOU_MIN = 0.25
+    N_SAMPLES = 100

-    def __init__(self, boxes, keypoints, image_path, output_path, args):
-        self.boxes = boxes
-        self.keypoints = keypoints
-        self.image_path = image_path
-        self.output_path = output_path
-        self.device = args.device
-        self.draw_kps = args.draw_kps
-        self.z_max = args.z_max
-        self.output_types = args.output_types
-        self.path_gt = args.path_gt
-        self.show = args.show
-        self.n_samples = 100
-        self.n_dropout = args.n_dropout
+    def __init__(self, model, device, n_dropout=0):
+
+        self.device = device
+        self.n_dropout = n_dropout
        if self.n_dropout > 0:
            self.epistemic = True
        else:
            self.epistemic = False
-        self.iou_min = 0.25

        # load the model parameters
-        self.model = LinearModel(input_size=self.input_size, output_size=self.output_size, linear_size=args.hidden_size)
-        self.model.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage))
+        self.model = LinearModel(input_size=self.INPUT_SIZE, output_size=self.OUTPUT_SIZE, linear_size=self.LINEAR_SIZE)
+        self.model.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage))
        self.model.eval()  # Default is train
        self.model.to(self.device)

-    def run(self):
-        # Extract calibration matrix if ground-truth file is present or use a default one
-        cnt = 0
-        dic_names, kk = factory_for_gt(self.path_gt, self.image_path)
+    def forward(self, boxes, keypoints, kk, dic_gt=None):
+
        (inputs_norm, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = \
-            get_input_data(self.boxes, self.keypoints, kk, left_to_right=True)
+            get_input_data(boxes, keypoints, kk, left_to_right=True)

        # Conversion into torch tensor
        if inputs_norm:
@ -76,7 +62,7 @@ class PredictMonoLoco:
                    for _ in range(self.n_dropout):
                        outputs = self.model(inputs)
                        outputs = unnormalize_bi(outputs)
-                        samples = laplace_sampling(outputs, self.n_samples)
+                        samples = laplace_sampling(outputs, self.N_SAMPLES)
                        total_outputs = torch.cat((total_outputs, samples), 0)
                    varss = total_outputs.std(0)
                else:
@ -92,11 +78,10 @@ class PredictMonoLoco:
                      .format(self.n_dropout, (end-start) * 1000))
                print("Single forward pass time = {:.2f} ms".format((end - start_single) * 1000))

-        # Print image and save json
+        # Create output files
        dic_out = defaultdict(list)
-        if dic_names:
-            name = os.path.basename(self.image_path)
-            boxes_gt, dds_gt = dic_names[name]['boxes'], dic_names[name]['dds']
+        if dic_gt:
+            boxes_gt, dds_gt = dic_gt['boxes'], dic_gt['dds']

        for idx, box in enumerate(uv_boxes):
            dd_pred = float(outputs[idx][0])
@ -104,9 +89,9 @@ class PredictMonoLoco:
            var_y = float(varss[idx])

            # Find the corresponding ground truth if available
-            if dic_names:
+            if dic_gt:
                idx_max, iou_max = get_idx_max(box, boxes_gt)
-                if iou_max > self.iou_min:
+                if iou_max > self.IOU_MIN:
                    dd_real = dds_gt[idx_max]
                    boxes_gt.pop(idx_max)
                    dds_gt.pop(idx_max)
@ -132,42 +117,4 @@ class PredictMonoLoco:
            dic_out['uv_centers'].append(uv_center)
            dic_out['uv_shoulders'].append(uv_shoulders[idx])

-        if any((xx in self.output_types for xx in ['front', 'bird', 'combined'])):
-            printer = Printer(self.image_path, self.output_path, dic_out, kk,  output_types=self.output_types,
-                              show=self.show, z_max=self.z_max, epistemic=self.epistemic)
-            printer.print()
-
-        if 'json' in self.output_types:
-            with open(os.path.join(self.output_path + '.monoloco.json'), 'w') as ff:
-                json.dump(dic_out, ff)
-
-        sys.stdout.write('\r' + 'Saving image {}'.format(cnt) + '\t')
-
-
-def factory_for_gt(path_gt, image_path):
-    """Look for ground-truth annotations file and define calibration matrix based on image size """
-
-    try:
-        with open(path_gt, 'r') as f:
-            dic_names = json.load(f)
-        print('-' * 120 + "\nMonoloco: Ground-truth file opened\n")
-    except FileNotFoundError:
-        print('-' * 120 + "\nMonoloco: ground-truth file not found\n")
-        dic_names = {}
-
-    try:
-        name = os.path.basename(image_path)
-        kk = dic_names[name]['K']
-        print("Monoloco: matched ground-truth file!\n" + '-' * 120)
-    except KeyError:
-        dic_names = None
-        with open(image_path, 'rb') as f:
-            im = Image.open(f)
-            if im.size[0] / im.size[1] > 2.5:
-                kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]]  # Kitti calibration
-            else:
-                kk = [[1266.4, 0., 816.27], [0, 1266.4, 491.5], [0., 0., 1.]]  # Nuscenes calibration
-        print("Ground-truth annotations for the image not found\n"
-              "Using a standard calibration matrix...\n" + '-' * 120)
-
-    return dic_names, kk
+        return dic_out
--- a/src/predict/predict_2d_3d.py
+++ b/src/predict/predict_2d_3d.py
@ -1,26 +1,26 @@

 import glob
-import json
 import os
+import sys

 import numpy as np
-
-from openpifpaf.network import nets
-from openpifpaf import decoder, show
-from openpifpaf import transforms
-from predict.predict_monoloco import PredictMonoLoco
-from utils.pifpaf import preprocess_pif
-
 import torchvision
 import torch
-
 from PIL import Image, ImageFile

+from openpifpaf.network import nets
+from openpifpaf import decoder
+from openpifpaf import transforms
+from predict.monoloco import MonoLoco
+from predict.factory import factory_for_gt, factory_outputs
+from utils.pifpaf import preprocess_pif
+

 class ImageList(torch.utils.data.Dataset):
+    """It defines transformations to apply to images and outputs of the dataloader"""
    def __init__(self, image_paths, scale, image_transform=None):
        self.image_paths = image_paths
-        self.image_transform = image_transform or transforms.image_transform
+        self.image_transform = image_transform or transforms.image_transform  # to_tensor + normalize  (from pifpaf)
        self.scale = scale

        # data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2
@ -37,7 +37,8 @@ class ImageList(torch.utils.data.Dataset):
                                                             (round(self.scale * image.size[1]),
                                                              round(self.scale * image.size[0])),
                                                             interpolation=Image.BICUBIC)
-        original_image = torchvision.transforms.functional.to_tensor(image)
+        # PIL images are not iterables
+        original_image = torchvision.transforms.functional.to_tensor(image)  # 0-255 --> 0-1
        image = self.image_transform(image)

        return image_path, original_image, image
@ -76,12 +77,16 @@ def factory_from_args(args):

 def predict(args):

+    cnt = 0
    factory_from_args(args)

-    # load model
-    model, _ = nets.factory_from_args(args)
-    model = model.to(args.device)
-    processor = decoder.factory_from_args(args, model)
+    # load pifpaf model
+    model_pifpaf, _ = nets.factory_from_args(args)
+    model_pifpaf = model_pifpaf.to(args.device)
+    processor = decoder.factory_from_args(args, model_pifpaf)
+
+    # load monoloco
+    monoloco = MonoLoco(model=args.model, device=args.device, n_dropout=args.n_dropout)

    # data
    data = ImageList(args.images, scale=args.scale)
@ -89,11 +94,6 @@ def predict(args):
        data, batch_size=1, shuffle=False,
        pin_memory=args.pin_memory, num_workers=args.loader_workers)

-    # Visualizer
-    keypoint_painter = show.KeypointPainter(show_box=True)
-    skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
-                                            markersize=1, linewidth=4)
-
    keypoints_whole = []
    for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
        images = image_tensors.permute(0, 2, 3, 1)
@ -121,45 +121,42 @@ def predict(args):
            # Correct to not change the confidence
            scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)

+            if keypoint_sets.size > 0:
+                keypoints_whole.append(np.around((keypoint_sets / scale_np), 1)
+                                       .reshape(keypoint_sets.shape[0], -1).tolist())
+
            pifpaf_out = [
                {'keypoints': np.around(kps / scale_np, 1).reshape(-1).tolist(),
                 'bbox': [np.min(kps[:, 0]) / args.scale, np.min(kps[:, 1]) / args.scale,
                          np.max(kps[:, 0]) / args.scale, np.max(kps[:, 1]) / args.scale]}
                for kps in keypoint_sets
            ]
-
-            # Save json file
-            if 'pifpaf' in args.networks:
-
-                if 'json' in args.output_types and keypoint_sets.size > 0:
-                    with open(output_path + '.pifpaf.json', 'w') as f:
-                        json.dump(pifpaf_out, f)
-
-                if keypoint_sets.size > 0:
-                    keypoints_whole.append(np.around((keypoint_sets / scale_np), 1)
-                                           .reshape(keypoint_sets.shape[0], -1).tolist())
-
-                if 'keypoints' in args.output_types:
-                    with show.image_canvas(image,
-                                           output_path + '.keypoints.png',
-                                           show=args.show,
-                                           fig_width=args.figure_width,
-                                           dpi_factor=args.dpi_factor) as ax:
-                        keypoint_painter.keypoints(ax, keypoint_sets)
-
-                if 'skeleton' in args.output_types:
-                    with show.image_canvas(image,
-                                           output_path + '.skeleton.png',
-                                           show=args.show,
-                                           fig_width=args.figure_width,
-                                           dpi_factor=args.dpi_factor) as ax:
-                        skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
+            pifpaf_outputs = [keypoint_sets, scores, pifpaf_out]  # keypoints_sets and scores for pifpaf printing
+            images_outputs = [image]  # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image

            if 'monoloco' in args.networks:
                im_size = (float(image.size()[1] / args.scale),
                           float(image.size()[0] / args.scale))  # Width, Height (original)
-                boxes, keypoints = preprocess_pif(pifpaf_out, im_size)
-                predict_monoloco = PredictMonoLoco(boxes, keypoints, image_path, output_path, args)
-                predict_monoloco.run()

+                # Extract calibration matrix and ground truth file if present
+
+                with open(image_path, 'rb') as f:
+                    pil_image = Image.open(f).convert('RGB')
+                    images_outputs.append(pil_image)
+
+                im_name = os.path.basename(image_path)
+
+                kk, _ = factory_for_gt(image, name=im_name, path_gt=args.path_gt)
+
+                # Preprocess pifpaf outputs and run monoloco
+                boxes, keypoints = preprocess_pif(pifpaf_out, im_size)
+                monoloco_outputs = monoloco.forward(boxes, keypoints, kk)
+            else:
+                monoloco_outputs = None
+                kk = None
+
+            factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_outputs=monoloco_outputs, kk=kk)
+            sys.stdout.write('\r' + 'Saving image {}'.format(cnt) + '\t')
+            cnt += 1
    return keypoints_whole
+
--- a/src/utils/camera.py
+++ b/src/utils/camera.py
@ -193,3 +193,15 @@ def get_depth(uv_center, kk, dd):

    xyz = pixel_to_camera(uv_center_np, kk, zz).tolist()
    return xyz
+
+
+def get_depth_from_distance(outputs, xy_centers):
+
+    list_zzs = []
+    for idx, _ in enumerate(outputs):
+        dd = float(outputs[idx][0])
+        xx_1 = float(xy_centers[idx][0])
+        yy_1 = float(xy_centers[idx][1])
+        zz = dd / math.sqrt(1 + xx_1 ** 2 + yy_1 ** 2)
+        list_zzs.append(zz)
+    return list_zzs
--- a/src/utils/kitti.py
+++ b/src/utils/kitti.py
@ -60,14 +60,24 @@ def get_calibration(path_txt):
    p2_list = [float(xx) for xx in p2_str]
    p2 = np.array(p2_list).reshape(3, 4)

-    kk = p2[:, :-1]
+    p3_str = file[3].split()[1:]
+    p3_list = [float(xx) for xx in p3_str]
+    p3 = np.array(p3_list).reshape(3, 4)
+
+    kk, tt = get_translation(p2)
+    kk_right, tt_right = get_translation(p3)
+
+    return [kk, tt], [kk_right, tt_right]
+
+
+def get_translation(pp):
+    """Separate intrinsic matrix from translation"""
+
+    kk = pp[:, :-1]
    f_x = kk[0, 0]
    f_y = kk[1, 1]
-    x0 = kk[2, 0]
-    y0 = kk[2, 1]
-    aa = p2[0, 3]
-    bb = p2[1, 3]
-    t3 = p2[2, 3]
+    x0, y0 = kk[2, 0:2]
+    aa, bb, t3 = pp[0:3, 3]
    t1 = (aa - x0*t3) / f_x
    t2 = (bb - y0*t3) / f_y
    tt = np.array([t1, t2, t3]).reshape(3, 1)
@ -102,6 +112,7 @@ def check_conditions(line, mode, thresh=0.5):
            check = True

    elif mode == 'gt':
+        # if line[:10] == 'Pedestrian' or line[:10] == 'Person_sit':
        if line[:10] == 'Pedestrian':
            check = True

--- a/src/utils/misc.py
+++ b/src/utils/misc.py
@ -204,4 +204,3 @@ def append_cluster(dic_jo, phase, xx, dd, kps):
        dic_jo[phase]['clst']['>30']['X'].append(xx)
        dic_jo[phase]['clst']['>30']['Y'].append([dd])

-
--- a/src/utils/stereo.py
+++ b/src/utils/stereo.py
@ -0,0 +1,49 @@
+
+import copy
+import numpy as np
+
+
+def depth_from_disparity(zzs, zzs_right, kps, kps_right):
+    """Associate instances in left and right images and compute disparity"""
+
+    zzs_stereo = []
+    cnt = 0
+    for idx, zz in enumerate(zzs):
+
+        # Find the closest human in terms of distance
+        zz_stereo, idx_min, delta_d_min = calculate_disparity(zz, zzs_right, kps[idx], kps_right)
+        if delta_d_min < 1:
+            zzs_stereo.append(zz_stereo)
+            zzs_right.pop(idx_min)
+            kps_right.pop(idx_min)
+            cnt += 1
+        else:
+            zzs_stereo.append(zz)
+
+    return zzs_stereo, cnt
+
+
+def calculate_disparity(zz, zzs_right, kp, kps_right):
+    """From 2 sets of keypoints calculate disparity as the median of the disparities"""
+
+    kp = np.array(copy.deepcopy(kp))
+    kps_right = np.array(copy.deepcopy(kps_right))
+    zz_stereo = 0
+    idx_min = 0
+    delta_z_min = 4
+
+    for idx, zz_right in enumerate(zzs_right):
+        delta_z = abs(zz - zz_right)
+        diffs = np.array(np.array(kp[0] - kps_right[idx][0]))
+        diff = np.mean(diffs)
+
+        # Check only for right instances (5 pxls = 80meters)
+        if delta_z < delta_z_min and diff > 5:
+            delta_z_min = delta_z
+            idx_min = idx
+            zzs = 0.54 * 721 / diffs
+            zz_stereo = np.median(zzs[kp[2] > 0])
+
+    return zz_stereo, idx_min, delta_z_min
+
+
--- a/src/visuals/printer.py
+++ b/src/visuals/printer.py
@ -9,17 +9,23 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable
 from matplotlib.patches import Ellipse, Circle
 import cv2
 from collections import OrderedDict
-from PIL import Image
+from utils.camera import pixel_to_camera


 class Printer:
    """
    Print results on images: birds eye view and computed distance
    """
+    RADIUS_KPS = 6
+    FONTSIZE_BV = 16
+    FONTSIZE = 18
+    TEXTCOLOR = 'darkorange'
+    COLOR_KPS = 'yellow'

-    def __init__(self, image_path, output_path, dic_ann, kk, output_types, show=False,
+    def __init__(self, image, output_path, dic_ann, kk, output_types, show=False,
                 draw_kps=False, text=True, legend=True, epistemic=False, z_max=30, fig_width=10):

+        self.im = image
        self.kk = kk
        self.output_types = output_types
        self.show = show
@ -27,13 +33,9 @@ class Printer:
        self.text = text
        self.epistemic = epistemic
        self.legend = legend
-        self.z_max = z_max # To include ellipses in the image
+        self.z_max = z_max  # To include ellipses in the image
        self.fig_width = fig_width

-        from utils.camera import pixel_to_camera, get_depth
-        self.pixel_to_camera = pixel_to_camera
-        self.get_depth = get_depth
-
        # Define the output dir
        self.path_out = output_path

@ -52,12 +54,10 @@ class Printer:
        self.uv_shoulders = dic_ann['uv_shoulders']
        self.uv_kps = dic_ann['uv_kps']

-        # Load the image
-        with open(image_path, 'rb') as f:
-            self.im = Image.open(f).convert('RGB')
-
        self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1])
+        self.ww = self.im.size[0]
        self.hh = self.im.size[1]
+        self.radius = 14 / 1600 * self.ww

    def print(self):
        """
@ -66,22 +66,15 @@ class Printer:
        Either front and/or bird visualization or combined one
        """
        # Parameters
-        radius = 14
-        radius_kps = 6
-        fontsize_bv = 16
-        fontsize = 18
-        textcolor = 'darkorange'
-        color_kps = 'yellow'

        # Resize image for aesthetic proportions in combined visualization
        if 'combined' in self.output_types:
-            ww = self.im.size[0]
-            hh = self.im.size[1]
-            y_scale = ww / (hh * 1.8)  # Defined proportion
-            self.im = self.im.resize((ww, round(hh * y_scale)))
-            print(y_scale)
-            width = self.fig_width + 0.6 * self.fig_width
-            height = self.fig_width * self.im.size[1] / self.im.size[0]
+            y_scale = self.ww / (self.hh * 1.8)  # Defined proportion
+            self.im = self.im.resize((self.ww, round(self.hh * y_scale)))
+            self.ww = self.im.size[0]
+            self.hh = self.im.size[1]
+            fig_width = self.fig_width + 0.6 * self.fig_width
+            fig_height = self.fig_width * self.hh / self.ww

            # Distinguish between KITTI images and general images
            if y_scale > 1.7:
@ -92,7 +85,7 @@ class Printer:
            ext = '.combined.png'

            fig, (ax1, ax0) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [1, width_ratio]},
-                                           figsize=(width, height))
+                                           figsize=(fig_width, fig_height))
            ax1.set_aspect(fig_ar_1)
            fig.set_tight_layout(True)
            fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02)
@ -104,7 +97,7 @@ class Printer:
        elif 'front' in self.output_types:
            y_scale = 1
            width = self.fig_width
-            height = self.fig_width * self.im.size[1] / self.im.size[0]
+            height = self.fig_width * self.hh / self.ww

            plt.figure(0)
            fig0, ax0 = plt.subplots(1, 1, figsize=(width, height))
@ -114,8 +107,8 @@ class Printer:
        if any(xx in self.output_types for xx in ['front', 'combined']):

            ax0.set_axis_off()
-            ax0.set_xlim(0, self.im.size[0])
-            ax0.set_ylim(self.im.size[1], 0)
+            ax0.set_xlim(0, self.ww)
+            ax0.set_ylim(self.hh, 0)
            ax0.imshow(self.im)
            z_min = 0
            bar_ticks = self.z_max // 5 + 1
@ -125,16 +118,16 @@ class Printer:
            for idx, uv in enumerate(self.uv_shoulders):

                if self.draw_kps:
-                    ax0 = self.show_kps(ax0, self.uv_kps[idx], y_scale, radius_kps, color_kps)
+                    ax0 = self.show_kps(ax0, self.uv_kps[idx], y_scale, self.RADIUS_KPS, self.COLOR_KPS)

                elif min(self.zz_pred[idx], self.zz_gt[idx]) > 0:
                    color = cmap((self.zz_pred[idx] % self.z_max) / self.z_max)
-                    circle = Circle((uv[0], uv[1] * y_scale), radius=radius, color=color, fill=True)
+                    circle = Circle((uv[0], uv[1] * y_scale), radius=self.radius, color=color, fill=True)
                    ax0.add_patch(circle)

                    if self.text:
-                        ax0.text(uv[0]+radius, uv[1] * y_scale - radius, str(num),
-                                 fontsize=fontsize, color=textcolor, weight='bold')
+                        ax0.text(uv[0]+self.radius, uv[1] * y_scale - self.radius, str(num),
+                                 fontsize=self.FONTSIZE, color=self.TEXTCOLOR, weight='bold')
                        num += 1

            ax0.get_xaxis().set_visible(False)
@ -166,7 +159,7 @@ class Printer:
        # Create bird or combine it with front)
        if any(xx in self.output_types for xx in ['bird', 'combined']):
            uv_max = np.array([0, self.hh, 1])
-            xyz_max = self.pixel_to_camera(uv_max, self.kk, self.z_max)
+            xyz_max = pixel_to_camera(uv_max, self.kk, self.z_max)
            x_max = abs(xyz_max[0])  # shortcut to avoid oval circles in case of different kk

            for idx, _ in enumerate(self.xx_gt):
@ -189,8 +182,8 @@ class Printer:
                                          height=1, angle=angle, color='b', fill=False, label="Aleatoric Uncertainty",
                                          linewidth=1.3)
                    ellipse_var = Ellipse((self.xx_pred[idx], self.zz_pred[idx]), width=self.stds_ale_epi[idx] * 2,
-                                          height=1, angle=angle, color='r', fill=False, label="Uncertainty", linewidth=1,
-                                          linestyle='--')
+                                          height=1, angle=angle, color='r', fill=False, label="Uncertainty",
+                                          linewidth=1, linestyle='--')

                    ax1.add_patch(ellipse_ale)
                    if self.epistemic:
@ -203,7 +196,7 @@ class Printer:
                        (_, x_pos), (_, z_pos) = get_confidence(self.xx_pred[idx], self.zz_pred[idx], self.stds_ale_epi[idx])

                        if self.text:
-                            ax1.text(x_pos, z_pos, str(num), fontsize=fontsize_bv, color='darkorange')
+                            ax1.text(x_pos, z_pos, str(num), fontsize=self.FONTSIZE_BV, color='darkorange')
                            num += 1

            # To avoid repetitions in the legend
@ -219,6 +212,10 @@ class Printer:
            ax1.set_xlabel("X [m]")
            ax1.set_ylabel("Z [m]")

+            # TO remove axis numbers
+            # plt.setp([ax1.get_yticklabels() for aa in fig.axes[:-1]], visible=False)
+            # plt.setp([ax1.get_xticklabels() for aa in fig.axes[:-1]], visible=False)
+
            if self.show:
                plt.show()
            else:
@ -227,9 +224,7 @@ class Printer:
            if self.draw_kps:
                im = cv2.imread(self.path_out + ext)
                im = self.increase_brightness(im, value=30)
-                hh = im.size[1]
-                ww = im.size[0]
-                im_new = im[0:hh, 0:round(ww/1.7)]
+                im_new = im[0 : self.hh, 0:round(self.ww / 1.7)]
                cv2.imwrite(self.path_out, im_new)

        plt.close('all')
@ -243,7 +238,8 @@ class Printer:

        return ax0

-    def increase_brightness(self, img, value=30):
+    @staticmethod
+    def increase_brightness(img, value=30):
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)