diff --git a/docs/002282.png.combined.png b/docs/002282.png.combined.png new file mode 100644 index 0000000..f2304a0 Binary files /dev/null and b/docs/002282.png.combined.png differ diff --git a/src/eval/run_kitti.py b/src/eval/run_kitti.py index cc24318..e096baf 100644 --- a/src/eval/run_kitti.py +++ b/src/eval/run_kitti.py @@ -1,16 +1,22 @@ +"""Run monoloco over all the pifpaf joints of KITTI images +and extract and save the annotations in txt files""" + -import torch import math -import numpy as np import os import glob import json import logging + +import numpy as np +import torch + from models.architectures import LinearModel from utils.misc import laplace_sampling from utils.kitti import eval_geometric, get_calibration from utils.normalize import unnormalize_bi from utils.pifpaf import get_input_data, preprocess_pif +from utils.camera import get_depth_from_distance class RunKitti: @@ -23,22 +29,17 @@ class RunKitti: average_y = 0.48 n_samples = 100 - def __init__(self, model, dir_ann, dropout, hidden_size, n_stage, n_dropout, stereo=False): + def __init__(self, model, dir_ann, dropout, hidden_size, n_stage, n_dropout): - # Set directories - assert dir_ann, "Annotations folder is required" self.dir_ann = dir_ann self.n_dropout = n_dropout - - list_ann = glob.glob(os.path.join(dir_ann, '*.json')) self.dir_kk = os.path.join('data', 'kitti', 'calib') self.dir_out = os.path.join('data', 'kitti', 'monoloco') if not os.path.exists(self.dir_out): os.makedirs(self.dir_out) print("Created output directory for txt files") - self.list_basename = [os.path.basename(x).split('.')[0] for x in list_ann] - assert self.list_basename, " Missing json annotations file to create txt files for KITTI datasets" + self.list_basename = factory_basename(dir_ann) # Load the model input_size = 17 * 2 @@ -54,86 +55,131 @@ class RunKitti: # Run inference for basename in self.list_basename: - path_calib = os.path.join(self.dir_kk, basename + '.txt') - kk, tt = get_calibration(path_calib) - path_ann = os.path.join(self.dir_ann, basename + '.png.pifpaf.json') - with open(path_ann, 'r') as f: - annotations = json.load(f) + path_calib = os.path.join(self.dir_kk, basename + '.txt') + + annotations, kk, tt, _ = factory_file(path_calib, self.dir_ann, basename) boxes, keypoints = preprocess_pif(annotations) (inputs, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = get_input_data(boxes, keypoints, kk) dds_geom, xy_centers = eval_geometric(uv_kps, uv_centers, uv_shoulders, kk, average_y=0.48) + # Update counting self.cnt_ann += len(boxes) - - inputs = torch.from_numpy(np.array(inputs)).float().to(self.device) - - if len(inputs) == 0: + if not inputs: self.cnt_no_file += 1 - else: self.cnt_file += 1 - if self.n_dropout > 0: - total_outputs = torch.empty((0, len(uv_boxes))).to(self.device) - self.model.dropout.training = True - for ii in range(self.n_dropout): - outputs = self.model(inputs) - outputs = unnormalize_bi(outputs) - samples = laplace_sampling(outputs, self.n_samples) - total_outputs = torch.cat((total_outputs, samples), 0) - varss = total_outputs.std(0) + # Run the model + inputs = torch.from_numpy(np.array(inputs)).float().to(self.device) + if self.n_dropout > 0: + total_outputs = torch.empty((0, len(uv_boxes))).to(self.device) + self.model.dropout.training = True + for _ in range(self.n_dropout): + outputs = self.model(inputs) + outputs = unnormalize_bi(outputs) + samples = laplace_sampling(outputs, self.n_samples) + total_outputs = torch.cat((total_outputs, samples), 0) + varss = total_outputs.std(0) - else: - varss = [0]*len(uv_boxes) + else: + varss = [0]*len(uv_boxes) - # Don't use dropout for the mean prediction and aleatoric uncertainty - self.model.dropout.training = False - outputs_net = self.model(inputs) - outputs = outputs_net.cpu().detach().numpy() + # Don't use dropout for the mean prediction and aleatoric uncertainty + self.model.dropout.training = False + outputs_net = self.model(inputs) + outputs = outputs_net.cpu().detach().numpy() - path_txt = os.path.join(self.dir_out, basename + '.txt') - with open(path_txt, "w+") as ff: - for idx in range(outputs.shape[0]): - xx_1 = float(xy_centers[idx][0]) - yy_1 = float(xy_centers[idx][1]) - xy_kp = xy_kps[idx] - dd = float(outputs[idx][0]) - std_ale = math.exp(float(outputs[idx][1])) * dd + list_zzs = get_depth_from_distance(outputs, xy_centers) + all_outputs = [outputs, varss, dds_geom] + all_inputs = [uv_boxes, xy_centers, xy_kps] + all_params = [kk, tt] - zz = dd / math.sqrt(1 + xx_1**2 + yy_1**2) - xx_cam_0 = xx_1*zz + tt[0] # Still to verify the sign but negligible - yy_cam_0 = yy_1*zz + tt[1] - zz_cam_0 = zz + tt[2] - dd_cam_0 = math.sqrt(xx_cam_0**2 + yy_cam_0**2 + zz_cam_0**2) - - uv_box = uv_boxes[idx] - - twodecimals = ["%.3f" % vv for vv in [uv_box[0], uv_box[1], uv_box[2], uv_box[3], - xx_cam_0, yy_cam_0, zz_cam_0, dd_cam_0, - std_ale, varss[idx], uv_box[4], dds_geom[idx]]] - - keypoints_str = ["%.5f" % vv for vv in xy_kp] - for item in twodecimals: - ff.write("%s " % item) - for item in keypoints_str: - ff.write("%s " % item) - ff.write("\n") - - # Save intrinsic matrix in the last row - kk_list = kk.reshape(-1,).tolist() - for kk_el in kk_list: - ff.write("%f " % kk_el) - ff.write("\n") + # Save the file + all_outputs.append(list_zzs) + path_txt = os.path.join(self.dir_out, basename + '.txt') + save_txts(path_txt, all_inputs, all_outputs, all_params) + aa = 5 # Print statistics print("Saved in {} txt {} annotations. Not found {} images" .format(self.cnt_file, self.cnt_ann, self.cnt_no_file)) - +def save_txts(path_txt, all_inputs, all_outputs, all_params): + + outputs, varss, dds_geom, zzs = all_outputs[:] + uv_boxes, xy_centers, xy_kps = all_inputs[:] + kk, tt = all_params[:] + + with open(path_txt, "w+") as ff: + for idx in range(outputs.shape[0]): + xx_1 = float(xy_centers[idx][0]) + yy_1 = float(xy_centers[idx][1]) + xy_kp = xy_kps[idx] + dd = float(outputs[idx][0]) + std_ale = math.exp(float(outputs[idx][1])) * dd + zz = zzs[idx] + xx_cam_0 = xx_1 * zz + tt[0] + yy_cam_0 = yy_1 * zz + tt[1] + zz_cam_0 = zz + tt[2] + dd_cam_0 = math.sqrt(xx_cam_0 ** 2 + yy_cam_0 ** 2 + zz_cam_0 ** 2) + + uv_box = uv_boxes[idx] + + twodecimals = ["%.3f" % vv for vv in [uv_box[0], uv_box[1], uv_box[2], uv_box[3], + xx_cam_0, yy_cam_0, zz_cam_0, dd_cam_0, + std_ale, varss[idx], uv_box[4], dds_geom[idx]]] + + keypoints_str = ["%.5f" % vv for vv in xy_kp] + for item in twodecimals: + ff.write("%s " % item) + for item in keypoints_str: + ff.write("%s " % item) + ff.write("\n") + + # Save intrinsic matrix in the last row + kk_list = kk.reshape(-1, ).tolist() + for kk_el in kk_list: + ff.write("%f " % kk_el) + ff.write("\n") + + + +def factory_basename(dir_ann): + """ Return all the basenames in the annotations folder""" + + list_ann = glob.glob(os.path.join(dir_ann, '*.json')) + list_basename = [os.path.basename(x).split('.')[0] for x in list_ann] + assert list_basename, " Missing json annotations file to create txt files for KITTI datasets" + + return list_basename + + +def factory_file(path_calib, dir_ann, basename, ite=0): + """Choose the annotation and the calibration files. Stereo option with ite = 1""" + + stereo_file = True + p_left, p_right = get_calibration(path_calib) + + if ite == 0: + kk, tt = p_left[:] + path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json') + else: + kk, tt = p_right[:] + path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json') + + try: + with open(path_ann, 'r') as f: + annotations = json.load(f) + except FileNotFoundError: + annotations = None + if ite == 1: + stereo_file = False + + return annotations, kk, tt, stereo_file diff --git a/src/features/preprocess_ki.py b/src/features/preprocess_ki.py index ce0d99c..18eb932 100644 --- a/src/features/preprocess_ki.py +++ b/src/features/preprocess_ki.py @@ -64,7 +64,8 @@ class PreprocessKitti: # Extract keypoints path_txt = os.path.join(self.dir_kk, basename + '.txt') - kk, tt = get_calibration(path_txt) + p_left, _ = get_calibration(path_txt) + kk = p_left[0] # Iterate over each line of the gt file and save box location and distances boxes_gt, dds_gt, _, _ = parse_ground_truth(path_gt) diff --git a/src/main.py b/src/main.py index e5c8640..a20a070 100644 --- a/src/main.py +++ b/src/main.py @@ -9,7 +9,7 @@ from openpifpaf.network import nets from openpifpaf import decoder from features.preprocess_nu import PreprocessNuscenes from features.preprocess_ki import PreprocessKitti -from predict.predict_2d_3d import predict +from predict.predict import predict from models.trainer import Trainer from eval.run_kitti import RunKitti from eval.geom_baseline import GeomBaseline diff --git a/src/predict/factory.py b/src/predict/factory.py new file mode 100644 index 0000000..b764f89 --- /dev/null +++ b/src/predict/factory.py @@ -0,0 +1,91 @@ + +import json +import os +from visuals.printer import Printer +from openpifpaf import show + +from PIL import Image + + +def factory_for_gt(image, name=None, path_gt=None): + """Look for ground-truth annotations file and define calibration matrix based on image size """ + + try: + with open(path_gt, 'r') as f: + dic_names = json.load(f) + print('-' * 120 + "\nMonoloco: Ground-truth file opened\n") + except FileNotFoundError: + print('-' * 120 + "\nMonoloco: ground-truth file not found\n") + dic_names = {} + + try: + kk = dic_names[name]['K'] + dic_gt = dic_names[name] + print("Monoloco: matched ground-truth file!\n" + '-' * 120) + except KeyError: + dic_gt = None + x_factor = image.size[0] / 1600 + y_factor = image.size[1] / 900 + pixel_factor = (x_factor + y_factor) / 2 + if image.size[0] / image.size[1] > 2.5: + kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration + else: + kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor], + [0, 1266.4 * pixel_factor, 491.5 * y_factor], + [0., 0., 1.]] # nuScenes calibration + + print("Ground-truth annotations for the image not found\n" + "Using a standard calibration matrix...\n" + '-' * 120) + + return kk, dic_gt + + +def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_outputs=None, kk=None): + """Output json files or images according to the choice""" + + # Save json file + if 'pifpaf' in args.networks: + + keypoint_sets, pifpaf_out, scores = pifpaf_outputs[:] + + # Visualizer + keypoint_painter = show.KeypointPainter(show_box=True) + skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True, + markersize=1, linewidth=4) + + if 'json' in args.output_types and keypoint_sets.size > 0: + with open(output_path + '.pifpaf.json', 'w') as f: + json.dump(pifpaf_out, f) + + if 'keypoints' in args.output_types: + with show.image_canvas(images_outputs[0], + output_path + '.keypoints.png', + show=args.show, + fig_width=args.figure_width, + dpi_factor=args.dpi_factor) as ax: + keypoint_painter.keypoints(ax, keypoint_sets) + + if 'skeleton' in args.output_types: + with show.image_canvas(images_outputs[0], + output_path + '.skeleton.png', + show=args.show, + fig_width=args.figure_width, + dpi_factor=args.dpi_factor) as ax: + skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) + + if 'monoloco' in args.networks: + if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])): + + epistemic = False + if args.n_dropout > 0: + epistemic = True + + printer = Printer(images_outputs[1], output_path, monoloco_outputs, kk, output_types=args.output_types, + show=args.show, z_max=args.z_max, epistemic=epistemic) + printer.print() + + if 'json' in args.output_types: + with open(os.path.join(args.output_path + '.monoloco.json'), 'w') as ff: + json.dump(monoloco_outputs, ff) + + diff --git a/src/predict/predict_monoloco.py b/src/predict/monoloco.py similarity index 52% rename from src/predict/predict_monoloco.py rename to src/predict/monoloco.py index ee6b023..4b7b66e 100644 --- a/src/predict/predict_monoloco.py +++ b/src/predict/monoloco.py @@ -1,65 +1,51 @@ """ -From a json file output images and json annotations +Monoloco predictor. It receives pifpaf joints and outputs distances """ -import sys from collections import defaultdict -import os -import json import logging import time import numpy as np import torch -from PIL import Image from models.architectures import LinearModel -from visuals.printer import Printer from utils.camera import get_depth from utils.misc import laplace_sampling, get_idx_max from utils.normalize import unnormalize_bi from utils.pifpaf import get_input_data -class PredictMonoLoco: +class MonoLoco: logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) - output_size = 2 - input_size = 17 * 2 + OUTPUT_SIZE = 2 + INPUT_SIZE = 17 * 2 + LINEAR_SIZE = 256 + IOU_MIN = 0.25 + N_SAMPLES = 100 - def __init__(self, boxes, keypoints, image_path, output_path, args): - self.boxes = boxes - self.keypoints = keypoints - self.image_path = image_path - self.output_path = output_path - self.device = args.device - self.draw_kps = args.draw_kps - self.z_max = args.z_max - self.output_types = args.output_types - self.path_gt = args.path_gt - self.show = args.show - self.n_samples = 100 - self.n_dropout = args.n_dropout + def __init__(self, model, device, n_dropout=0): + + self.device = device + self.n_dropout = n_dropout if self.n_dropout > 0: self.epistemic = True else: self.epistemic = False - self.iou_min = 0.25 # load the model parameters - self.model = LinearModel(input_size=self.input_size, output_size=self.output_size, linear_size=args.hidden_size) - self.model.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage)) + self.model = LinearModel(input_size=self.INPUT_SIZE, output_size=self.OUTPUT_SIZE, linear_size=self.LINEAR_SIZE) + self.model.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage)) self.model.eval() # Default is train self.model.to(self.device) - def run(self): - # Extract calibration matrix if ground-truth file is present or use a default one - cnt = 0 - dic_names, kk = factory_for_gt(self.path_gt, self.image_path) + def forward(self, boxes, keypoints, kk, dic_gt=None): + (inputs_norm, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = \ - get_input_data(self.boxes, self.keypoints, kk, left_to_right=True) + get_input_data(boxes, keypoints, kk, left_to_right=True) # Conversion into torch tensor if inputs_norm: @@ -76,7 +62,7 @@ class PredictMonoLoco: for _ in range(self.n_dropout): outputs = self.model(inputs) outputs = unnormalize_bi(outputs) - samples = laplace_sampling(outputs, self.n_samples) + samples = laplace_sampling(outputs, self.N_SAMPLES) total_outputs = torch.cat((total_outputs, samples), 0) varss = total_outputs.std(0) else: @@ -92,11 +78,10 @@ class PredictMonoLoco: .format(self.n_dropout, (end-start) * 1000)) print("Single forward pass time = {:.2f} ms".format((end - start_single) * 1000)) - # Print image and save json + # Create output files dic_out = defaultdict(list) - if dic_names: - name = os.path.basename(self.image_path) - boxes_gt, dds_gt = dic_names[name]['boxes'], dic_names[name]['dds'] + if dic_gt: + boxes_gt, dds_gt = dic_gt['boxes'], dic_gt['dds'] for idx, box in enumerate(uv_boxes): dd_pred = float(outputs[idx][0]) @@ -104,9 +89,9 @@ class PredictMonoLoco: var_y = float(varss[idx]) # Find the corresponding ground truth if available - if dic_names: + if dic_gt: idx_max, iou_max = get_idx_max(box, boxes_gt) - if iou_max > self.iou_min: + if iou_max > self.IOU_MIN: dd_real = dds_gt[idx_max] boxes_gt.pop(idx_max) dds_gt.pop(idx_max) @@ -132,42 +117,4 @@ class PredictMonoLoco: dic_out['uv_centers'].append(uv_center) dic_out['uv_shoulders'].append(uv_shoulders[idx]) - if any((xx in self.output_types for xx in ['front', 'bird', 'combined'])): - printer = Printer(self.image_path, self.output_path, dic_out, kk, output_types=self.output_types, - show=self.show, z_max=self.z_max, epistemic=self.epistemic) - printer.print() - - if 'json' in self.output_types: - with open(os.path.join(self.output_path + '.monoloco.json'), 'w') as ff: - json.dump(dic_out, ff) - - sys.stdout.write('\r' + 'Saving image {}'.format(cnt) + '\t') - - -def factory_for_gt(path_gt, image_path): - """Look for ground-truth annotations file and define calibration matrix based on image size """ - - try: - with open(path_gt, 'r') as f: - dic_names = json.load(f) - print('-' * 120 + "\nMonoloco: Ground-truth file opened\n") - except FileNotFoundError: - print('-' * 120 + "\nMonoloco: ground-truth file not found\n") - dic_names = {} - - try: - name = os.path.basename(image_path) - kk = dic_names[name]['K'] - print("Monoloco: matched ground-truth file!\n" + '-' * 120) - except KeyError: - dic_names = None - with open(image_path, 'rb') as f: - im = Image.open(f) - if im.size[0] / im.size[1] > 2.5: - kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration - else: - kk = [[1266.4, 0., 816.27], [0, 1266.4, 491.5], [0., 0., 1.]] # Nuscenes calibration - print("Ground-truth annotations for the image not found\n" - "Using a standard calibration matrix...\n" + '-' * 120) - - return dic_names, kk + return dic_out diff --git a/src/predict/predict_2d_3d.py b/src/predict/predict.py similarity index 66% rename from src/predict/predict_2d_3d.py rename to src/predict/predict.py index 27169f2..4acb7cd 100644 --- a/src/predict/predict_2d_3d.py +++ b/src/predict/predict.py @@ -1,26 +1,26 @@ import glob -import json import os +import sys import numpy as np - -from openpifpaf.network import nets -from openpifpaf import decoder, show -from openpifpaf import transforms -from predict.predict_monoloco import PredictMonoLoco -from utils.pifpaf import preprocess_pif - import torchvision import torch - from PIL import Image, ImageFile +from openpifpaf.network import nets +from openpifpaf import decoder +from openpifpaf import transforms +from predict.monoloco import MonoLoco +from predict.factory import factory_for_gt, factory_outputs +from utils.pifpaf import preprocess_pif + class ImageList(torch.utils.data.Dataset): + """It defines transformations to apply to images and outputs of the dataloader""" def __init__(self, image_paths, scale, image_transform=None): self.image_paths = image_paths - self.image_transform = image_transform or transforms.image_transform + self.image_transform = image_transform or transforms.image_transform # to_tensor + normalize (from pifpaf) self.scale = scale # data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2 @@ -37,7 +37,8 @@ class ImageList(torch.utils.data.Dataset): (round(self.scale * image.size[1]), round(self.scale * image.size[0])), interpolation=Image.BICUBIC) - original_image = torchvision.transforms.functional.to_tensor(image) + # PIL images are not iterables + original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1 image = self.image_transform(image) return image_path, original_image, image @@ -76,12 +77,16 @@ def factory_from_args(args): def predict(args): + cnt = 0 factory_from_args(args) - # load model - model, _ = nets.factory_from_args(args) - model = model.to(args.device) - processor = decoder.factory_from_args(args, model) + # load pifpaf model + model_pifpaf, _ = nets.factory_from_args(args) + model_pifpaf = model_pifpaf.to(args.device) + processor = decoder.factory_from_args(args, model_pifpaf) + + # load monoloco + monoloco = MonoLoco(model=args.model, device=args.device, n_dropout=args.n_dropout) # data data = ImageList(args.images, scale=args.scale) @@ -89,11 +94,6 @@ def predict(args): data, batch_size=1, shuffle=False, pin_memory=args.pin_memory, num_workers=args.loader_workers) - # Visualizer - keypoint_painter = show.KeypointPainter(show_box=True) - skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True, - markersize=1, linewidth=4) - keypoints_whole = [] for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader): images = image_tensors.permute(0, 2, 3, 1) @@ -121,45 +121,42 @@ def predict(args): # Correct to not change the confidence scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3) + if keypoint_sets.size > 0: + keypoints_whole.append(np.around((keypoint_sets / scale_np), 1) + .reshape(keypoint_sets.shape[0], -1).tolist()) + pifpaf_out = [ {'keypoints': np.around(kps / scale_np, 1).reshape(-1).tolist(), 'bbox': [np.min(kps[:, 0]) / args.scale, np.min(kps[:, 1]) / args.scale, np.max(kps[:, 0]) / args.scale, np.max(kps[:, 1]) / args.scale]} for kps in keypoint_sets ] - - # Save json file - if 'pifpaf' in args.networks: - - if 'json' in args.output_types and keypoint_sets.size > 0: - with open(output_path + '.pifpaf.json', 'w') as f: - json.dump(pifpaf_out, f) - - if keypoint_sets.size > 0: - keypoints_whole.append(np.around((keypoint_sets / scale_np), 1) - .reshape(keypoint_sets.shape[0], -1).tolist()) - - if 'keypoints' in args.output_types: - with show.image_canvas(image, - output_path + '.keypoints.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - keypoint_painter.keypoints(ax, keypoint_sets) - - if 'skeleton' in args.output_types: - with show.image_canvas(image, - output_path + '.skeleton.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) + pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing + images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image if 'monoloco' in args.networks: im_size = (float(image.size()[1] / args.scale), float(image.size()[0] / args.scale)) # Width, Height (original) - boxes, keypoints = preprocess_pif(pifpaf_out, im_size) - predict_monoloco = PredictMonoLoco(boxes, keypoints, image_path, output_path, args) - predict_monoloco.run() + # Extract calibration matrix and ground truth file if present + + with open(image_path, 'rb') as f: + pil_image = Image.open(f).convert('RGB') + images_outputs.append(pil_image) + + im_name = os.path.basename(image_path) + + kk, _ = factory_for_gt(image, name=im_name, path_gt=args.path_gt) + + # Preprocess pifpaf outputs and run monoloco + boxes, keypoints = preprocess_pif(pifpaf_out, im_size) + monoloco_outputs = monoloco.forward(boxes, keypoints, kk) + else: + monoloco_outputs = None + kk = None + + factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_outputs=monoloco_outputs, kk=kk) + sys.stdout.write('\r' + 'Saving image {}'.format(cnt) + '\t') + cnt += 1 return keypoints_whole + diff --git a/src/utils/camera.py b/src/utils/camera.py index 8844763..9b1824e 100644 --- a/src/utils/camera.py +++ b/src/utils/camera.py @@ -193,3 +193,15 @@ def get_depth(uv_center, kk, dd): xyz = pixel_to_camera(uv_center_np, kk, zz).tolist() return xyz + + +def get_depth_from_distance(outputs, xy_centers): + + list_zzs = [] + for idx, _ in enumerate(outputs): + dd = float(outputs[idx][0]) + xx_1 = float(xy_centers[idx][0]) + yy_1 = float(xy_centers[idx][1]) + zz = dd / math.sqrt(1 + xx_1 ** 2 + yy_1 ** 2) + list_zzs.append(zz) + return list_zzs diff --git a/src/utils/kitti.py b/src/utils/kitti.py index bd48992..426a9ea 100644 --- a/src/utils/kitti.py +++ b/src/utils/kitti.py @@ -60,14 +60,24 @@ def get_calibration(path_txt): p2_list = [float(xx) for xx in p2_str] p2 = np.array(p2_list).reshape(3, 4) - kk = p2[:, :-1] + p3_str = file[3].split()[1:] + p3_list = [float(xx) for xx in p3_str] + p3 = np.array(p3_list).reshape(3, 4) + + kk, tt = get_translation(p2) + kk_right, tt_right = get_translation(p3) + + return [kk, tt], [kk_right, tt_right] + + +def get_translation(pp): + """Separate intrinsic matrix from translation""" + + kk = pp[:, :-1] f_x = kk[0, 0] f_y = kk[1, 1] - x0 = kk[2, 0] - y0 = kk[2, 1] - aa = p2[0, 3] - bb = p2[1, 3] - t3 = p2[2, 3] + x0, y0 = kk[2, 0:2] + aa, bb, t3 = pp[0:3, 3] t1 = (aa - x0*t3) / f_x t2 = (bb - y0*t3) / f_y tt = np.array([t1, t2, t3]).reshape(3, 1) @@ -102,6 +112,7 @@ def check_conditions(line, mode, thresh=0.5): check = True elif mode == 'gt': + # if line[:10] == 'Pedestrian' or line[:10] == 'Person_sit': if line[:10] == 'Pedestrian': check = True diff --git a/src/utils/misc.py b/src/utils/misc.py index 3f11c96..30e411c 100644 --- a/src/utils/misc.py +++ b/src/utils/misc.py @@ -204,4 +204,3 @@ def append_cluster(dic_jo, phase, xx, dd, kps): dic_jo[phase]['clst']['>30']['X'].append(xx) dic_jo[phase]['clst']['>30']['Y'].append([dd]) - diff --git a/src/utils/stereo.py b/src/utils/stereo.py new file mode 100644 index 0000000..e744017 --- /dev/null +++ b/src/utils/stereo.py @@ -0,0 +1,49 @@ + +import copy +import numpy as np + + +def depth_from_disparity(zzs, zzs_right, kps, kps_right): + """Associate instances in left and right images and compute disparity""" + + zzs_stereo = [] + cnt = 0 + for idx, zz in enumerate(zzs): + + # Find the closest human in terms of distance + zz_stereo, idx_min, delta_d_min = calculate_disparity(zz, zzs_right, kps[idx], kps_right) + if delta_d_min < 1: + zzs_stereo.append(zz_stereo) + zzs_right.pop(idx_min) + kps_right.pop(idx_min) + cnt += 1 + else: + zzs_stereo.append(zz) + + return zzs_stereo, cnt + + +def calculate_disparity(zz, zzs_right, kp, kps_right): + """From 2 sets of keypoints calculate disparity as the median of the disparities""" + + kp = np.array(copy.deepcopy(kp)) + kps_right = np.array(copy.deepcopy(kps_right)) + zz_stereo = 0 + idx_min = 0 + delta_z_min = 4 + + for idx, zz_right in enumerate(zzs_right): + delta_z = abs(zz - zz_right) + diffs = np.array(np.array(kp[0] - kps_right[idx][0])) + diff = np.mean(diffs) + + # Check only for right instances (5 pxls = 80meters) + if delta_z < delta_z_min and diff > 5: + delta_z_min = delta_z + idx_min = idx + zzs = 0.54 * 721 / diffs + zz_stereo = np.median(zzs[kp[2] > 0]) + + return zz_stereo, idx_min, delta_z_min + + diff --git a/src/visuals/printer.py b/src/visuals/printer.py index 98805dc..ef24bc9 100644 --- a/src/visuals/printer.py +++ b/src/visuals/printer.py @@ -9,17 +9,23 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable from matplotlib.patches import Ellipse, Circle import cv2 from collections import OrderedDict -from PIL import Image +from utils.camera import pixel_to_camera class Printer: """ Print results on images: birds eye view and computed distance """ + RADIUS_KPS = 6 + FONTSIZE_BV = 16 + FONTSIZE = 18 + TEXTCOLOR = 'darkorange' + COLOR_KPS = 'yellow' - def __init__(self, image_path, output_path, dic_ann, kk, output_types, show=False, + def __init__(self, image, output_path, dic_ann, kk, output_types, show=False, draw_kps=False, text=True, legend=True, epistemic=False, z_max=30, fig_width=10): + self.im = image self.kk = kk self.output_types = output_types self.show = show @@ -27,13 +33,9 @@ class Printer: self.text = text self.epistemic = epistemic self.legend = legend - self.z_max = z_max # To include ellipses in the image + self.z_max = z_max # To include ellipses in the image self.fig_width = fig_width - from utils.camera import pixel_to_camera, get_depth - self.pixel_to_camera = pixel_to_camera - self.get_depth = get_depth - # Define the output dir self.path_out = output_path @@ -52,12 +54,10 @@ class Printer: self.uv_shoulders = dic_ann['uv_shoulders'] self.uv_kps = dic_ann['uv_kps'] - # Load the image - with open(image_path, 'rb') as f: - self.im = Image.open(f).convert('RGB') - self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1]) + self.ww = self.im.size[0] self.hh = self.im.size[1] + self.radius = 14 / 1600 * self.ww def print(self): """ @@ -66,22 +66,15 @@ class Printer: Either front and/or bird visualization or combined one """ # Parameters - radius = 14 - radius_kps = 6 - fontsize_bv = 16 - fontsize = 18 - textcolor = 'darkorange' - color_kps = 'yellow' # Resize image for aesthetic proportions in combined visualization if 'combined' in self.output_types: - ww = self.im.size[0] - hh = self.im.size[1] - y_scale = ww / (hh * 1.8) # Defined proportion - self.im = self.im.resize((ww, round(hh * y_scale))) - print(y_scale) - width = self.fig_width + 0.6 * self.fig_width - height = self.fig_width * self.im.size[1] / self.im.size[0] + y_scale = self.ww / (self.hh * 1.8) # Defined proportion + self.im = self.im.resize((self.ww, round(self.hh * y_scale))) + self.ww = self.im.size[0] + self.hh = self.im.size[1] + fig_width = self.fig_width + 0.6 * self.fig_width + fig_height = self.fig_width * self.hh / self.ww # Distinguish between KITTI images and general images if y_scale > 1.7: @@ -92,7 +85,7 @@ class Printer: ext = '.combined.png' fig, (ax1, ax0) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [1, width_ratio]}, - figsize=(width, height)) + figsize=(fig_width, fig_height)) ax1.set_aspect(fig_ar_1) fig.set_tight_layout(True) fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02) @@ -104,7 +97,7 @@ class Printer: elif 'front' in self.output_types: y_scale = 1 width = self.fig_width - height = self.fig_width * self.im.size[1] / self.im.size[0] + height = self.fig_width * self.hh / self.ww plt.figure(0) fig0, ax0 = plt.subplots(1, 1, figsize=(width, height)) @@ -114,8 +107,8 @@ class Printer: if any(xx in self.output_types for xx in ['front', 'combined']): ax0.set_axis_off() - ax0.set_xlim(0, self.im.size[0]) - ax0.set_ylim(self.im.size[1], 0) + ax0.set_xlim(0, self.ww) + ax0.set_ylim(self.hh, 0) ax0.imshow(self.im) z_min = 0 bar_ticks = self.z_max // 5 + 1 @@ -125,16 +118,16 @@ class Printer: for idx, uv in enumerate(self.uv_shoulders): if self.draw_kps: - ax0 = self.show_kps(ax0, self.uv_kps[idx], y_scale, radius_kps, color_kps) + ax0 = self.show_kps(ax0, self.uv_kps[idx], y_scale, self.RADIUS_KPS, self.COLOR_KPS) elif min(self.zz_pred[idx], self.zz_gt[idx]) > 0: color = cmap((self.zz_pred[idx] % self.z_max) / self.z_max) - circle = Circle((uv[0], uv[1] * y_scale), radius=radius, color=color, fill=True) + circle = Circle((uv[0], uv[1] * y_scale), radius=self.radius, color=color, fill=True) ax0.add_patch(circle) if self.text: - ax0.text(uv[0]+radius, uv[1] * y_scale - radius, str(num), - fontsize=fontsize, color=textcolor, weight='bold') + ax0.text(uv[0]+self.radius, uv[1] * y_scale - self.radius, str(num), + fontsize=self.FONTSIZE, color=self.TEXTCOLOR, weight='bold') num += 1 ax0.get_xaxis().set_visible(False) @@ -166,7 +159,7 @@ class Printer: # Create bird or combine it with front) if any(xx in self.output_types for xx in ['bird', 'combined']): uv_max = np.array([0, self.hh, 1]) - xyz_max = self.pixel_to_camera(uv_max, self.kk, self.z_max) + xyz_max = pixel_to_camera(uv_max, self.kk, self.z_max) x_max = abs(xyz_max[0]) # shortcut to avoid oval circles in case of different kk for idx, _ in enumerate(self.xx_gt): @@ -189,8 +182,8 @@ class Printer: height=1, angle=angle, color='b', fill=False, label="Aleatoric Uncertainty", linewidth=1.3) ellipse_var = Ellipse((self.xx_pred[idx], self.zz_pred[idx]), width=self.stds_ale_epi[idx] * 2, - height=1, angle=angle, color='r', fill=False, label="Uncertainty", linewidth=1, - linestyle='--') + height=1, angle=angle, color='r', fill=False, label="Uncertainty", + linewidth=1, linestyle='--') ax1.add_patch(ellipse_ale) if self.epistemic: @@ -203,7 +196,7 @@ class Printer: (_, x_pos), (_, z_pos) = get_confidence(self.xx_pred[idx], self.zz_pred[idx], self.stds_ale_epi[idx]) if self.text: - ax1.text(x_pos, z_pos, str(num), fontsize=fontsize_bv, color='darkorange') + ax1.text(x_pos, z_pos, str(num), fontsize=self.FONTSIZE_BV, color='darkorange') num += 1 # To avoid repetitions in the legend @@ -219,6 +212,10 @@ class Printer: ax1.set_xlabel("X [m]") ax1.set_ylabel("Z [m]") + # TO remove axis numbers + # plt.setp([ax1.get_yticklabels() for aa in fig.axes[:-1]], visible=False) + # plt.setp([ax1.get_xticklabels() for aa in fig.axes[:-1]], visible=False) + if self.show: plt.show() else: @@ -227,9 +224,7 @@ class Printer: if self.draw_kps: im = cv2.imread(self.path_out + ext) im = self.increase_brightness(im, value=30) - hh = im.size[1] - ww = im.size[0] - im_new = im[0:hh, 0:round(ww/1.7)] + im_new = im[0 : self.hh, 0:round(self.ww / 1.7)] cv2.imwrite(self.path_out, im_new) plt.close('all') @@ -243,7 +238,8 @@ class Printer: return ax0 - def increase_brightness(self, img, value=30): + @staticmethod + def increase_brightness(img, value=30): hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) h, s, v = cv2.split(hsv)