import json import os import numpy as np import torch import torchvision from ..utils import get_keypoints, pixel_to_camera, to_cartesian, back_correct_angles BF = 0.54 * 721 z_min = 4 z_max = 60 D_MIN = BF / z_max D_MAX = BF / z_min def preprocess_monstereo(keypoints, keypoints_r, kk): """ Combine left and right keypoints in all-vs-all settings """ clusters = [] inputs_l = preprocess_monoloco(keypoints, kk) inputs_r = preprocess_monoloco(keypoints_r, kk) inputs = torch.empty((0, 68)).to(inputs_l.device) for idx, inp_l in enumerate(inputs_l.split(1)): clst = 0 # inp_l = torch.cat((inp_l, cat[:, idx:idx+1]), dim=1) for idx_r, inp_r in enumerate(inputs_r.split(1)): # if D_MIN < avg_disparities[idx_r] < D_MAX: # Check the range of disparities inp_r = inputs_r[idx_r, :] inp = torch.cat((inp_l, inp_l - inp_r), dim=1) # (1,68) inputs = torch.cat((inputs, inp), dim=0) clst += 1 clusters.append(clst) return inputs, clusters def preprocess_monoloco(keypoints, kk, zero_center=False): """ Preprocess batches of inputs keypoints = torch tensors of (m, 3, 17) or list [3,17] Outputs = torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box """ if isinstance(keypoints, list): keypoints = torch.tensor(keypoints) if isinstance(kk, list): kk = torch.tensor(kk) # Projection in normalized image coordinates and zero-center with the center of the bounding box uv_center = get_keypoints(keypoints, mode='center') xy1_center = pixel_to_camera(uv_center, kk, 10) xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10) if zero_center: kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3) else: kps_norm = xy1_all kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view # kps_out = torch.cat((kps_out, keypoints[:, 2, :]), dim=1) return kps_out def factory_for_gt(im_size, name=None, path_gt=None, verbose=True): """Look for ground-truth annotations file and define calibration matrix based on image size """ try: with open(path_gt, 'r') as f: dic_names = json.load(f) if verbose: print('-' * 120 + "\nGround-truth file opened") except (FileNotFoundError, TypeError): if verbose: print('-' * 120 + "\nGround-truth file not found") dic_names = {} try: kk = dic_names[name]['K'] dic_gt = dic_names[name] if verbose: print("Matched ground-truth file!") except KeyError: dic_gt = None x_factor = im_size[0] / 1600 y_factor = im_size[1] / 900 pixel_factor = (x_factor + y_factor) / 2 # 1.7 for MOT # pixel_factor = 1 if im_size[0] / im_size[1] > 2.5: kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration else: kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor], [0, 1266.4 * pixel_factor, 491.5 * y_factor], [0., 0., 1.]] # nuScenes calibration if verbose: print("Using a standard calibration matrix...") return kk, dic_gt def laplace_sampling(outputs, n_samples): torch.manual_seed(1) mu = outputs[:, 0] bi = torch.abs(outputs[:, 1]) # Analytical # uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0]) # xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu)) # Sampling cuda_check = outputs.is_cuda if cuda_check: get_device = outputs.get_device() device = torch.device(type="cuda", index=get_device) else: device = torch.device("cpu") laplace = torch.distributions.Laplace(mu, bi) xx = laplace.sample((n_samples,)).to(device) return xx def unnormalize_bi(loc): """ Unnormalize relative bi of a nunmpy array Input --> tensor of (m, 2) """ assert loc.size()[1] == 2, "size of the output tensor should be (m, 2)" bi = torch.exp(loc[:, 1:2]) * loc[:, 0:1] return bi def preprocess_mask(dir_ann, basename, mode='left'): dir_ann = os.path.join(os.path.split(dir_ann)[0], 'mask') if mode == 'left': path_ann = os.path.join(dir_ann, basename + '.json') elif mode == 'right': path_ann = os.path.join(dir_ann + '_right', basename + '.json') from ..utils import open_annotations dic = open_annotations(path_ann) if isinstance(dic, list): return [], [] keypoints = [] for kps in dic['keypoints']: kps = prepare_pif_kps(np.array(kps).reshape(51,).tolist()) keypoints.append(kps) return dic['boxes'], keypoints def preprocess_pifpaf(annotations, im_size=None, enlarge_boxes=True, min_conf=0.): """ Preprocess pif annotations: 1. enlarge the box of 10% 2. Constraint it inside the image (if image_size provided) """ boxes = [] keypoints = [] enlarge = 1 if enlarge_boxes else 2 # Avoid enlarge boxes for social distancing for dic in annotations: kps = prepare_pif_kps(dic['keypoints']) box = dic['bbox'] try: conf = dic['score'] # Enlarge boxes delta_h = (box[3]) / (10 * enlarge) delta_w = (box[2]) / (5 * enlarge) # from width height to corners box[2] += box[0] box[3] += box[1] except KeyError: all_confs = np.array(kps[2]) score_weights = np.ones(17) score_weights[:3] = 3.0 score_weights[5:] = 0.1 # conf = np.sum(score_weights * np.sort(all_confs)[::-1]) conf = float(np.mean(all_confs)) # Add 15% for y and 20% for x delta_h = (box[3] - box[1]) / (7 * enlarge) delta_w = (box[2] - box[0]) / (3.5 * enlarge) assert delta_h > -5 and delta_w > -5, "Bounding box <=0" box[0] -= delta_w box[1] -= delta_h box[2] += delta_w box[3] += delta_h # Put the box inside the image if im_size is not None: box[0] = max(0, box[0]) box[1] = max(0, box[1]) box[2] = min(box[2], im_size[0]) box[3] = min(box[3], im_size[1]) if conf >= min_conf: box.append(conf) boxes.append(box) keypoints.append(kps) return boxes, keypoints def prepare_pif_kps(kps_in): """Convert from a list of 51 to a list of 3, 17""" assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3" xxs = kps_in[0:][::3] yys = kps_in[1:][::3] # from offset 1 every 3 ccs = kps_in[2:][::3] return [xxs, yys, ccs] def image_transform(image): normalize = torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), normalize, ]) return transforms(image) def extract_outputs(outputs, tasks=()): """ Extract the outputs for multi-task training and predictions Inputs: tensor (m, 10) or (m,9) if monoloco Outputs: - if tasks are provided return ordered list of raw tensors - else return a dictionary with processed outputs """ dic_out = {'x': outputs[:, 0:1], 'y': outputs[:, 1:2], 'd': outputs[:, 2:4], 'h': outputs[:, 4:5], 'w': outputs[:, 5:6], 'l': outputs[:, 6:7], 'ori': outputs[:, 7:9]} if outputs.shape[1] == 10: dic_out['aux'] = outputs[:, 9:10] # Multi-task training if len(tasks) >= 1: assert isinstance(tasks, tuple), "tasks need to be a tuple" return [dic_out[task] for task in tasks] # Preprocess the tensor # AV_H, AV_W, AV_L, HWL_STD = 1.72, 0.75, 0.68, 0.1 bi = unnormalize_bi(dic_out['d']) dic_out['bi'] = bi dic_out = {key: el.detach().cpu() for key, el in dic_out.items()} x = to_cartesian(outputs[:, 0:3].detach().cpu(), mode='x') y = to_cartesian(outputs[:, 0:3].detach().cpu(), mode='y') d = dic_out['d'][:, 0:1] z = torch.sqrt(d**2 - x**2 - y**2) dic_out['xyzd'] = torch.cat((x, y, z, d), dim=1) dic_out.pop('d') dic_out.pop('x') dic_out.pop('y') dic_out['d'] = d yaw_pred = torch.atan2(dic_out['ori'][:, 0:1], dic_out['ori'][:, 1:2]) yaw_orig = back_correct_angles(yaw_pred, dic_out['xyzd'][:, 0:3]) dic_out['yaw'] = (yaw_pred, yaw_orig) # alpha, ry if outputs.shape[1] == 10: dic_out['aux'] = torch.sigmoid(dic_out['aux']) return dic_out def extract_labels_aux(labels, tasks=None): dic_gt_out = {'aux': labels[:, 0:1]} if tasks is not None: assert isinstance(tasks, tuple), "tasks need to be a tuple" return [dic_gt_out[task] for task in tasks] dic_gt_out = {key: el.detach().cpu() for key, el in dic_gt_out.items()} return dic_gt_out def extract_labels(labels, tasks=None): dic_gt_out = {'x': labels[:, 0:1], 'y': labels[:, 1:2], 'z': labels[:, 2:3], 'd': labels[:, 3:4], 'h': labels[:, 4:5], 'w': labels[:, 5:6], 'l': labels[:, 6:7], 'ori': labels[:, 7:9], 'aux': labels[:, 10:11]} if tasks is not None: assert isinstance(tasks, tuple), "tasks need to be a tuple" return [dic_gt_out[task] for task in tasks] dic_gt_out = {key: el.detach().cpu() for key, el in dic_gt_out.items()} return dic_gt_out def cluster_outputs(outputs, clusters): """Cluster the outputs based on the number of right keypoints""" # Check for "no right keypoints" condition if clusters == 0: clusters = max(1, round(outputs.shape[0] / 2)) assert outputs.shape[0] % clusters == 0, "Unexpected number of inputs" outputs = outputs.view(-1, clusters, outputs.shape[1]) return outputs def filter_outputs(outputs): """Extract a single output for each left keypoint""" # Max of auxiliary task val = outputs[:, :, -1] best_val, _ = val.max(dim=1, keepdim=True) mask = val >= best_val output = outputs[mask] # broadcasting happens only if 3rd dim not present return output, mask def extract_outputs_mono(outputs, tasks=None): """ Extract the outputs for single di Inputs: tensor (m, 10) or (m,9) if monoloco Outputs: - if tasks are provided return ordered list of raw tensors - else return a dictionary with processed outputs """ dic_out = {'xyz': outputs[:, 0:3], 'zb': outputs[:, 2:4], 'h': outputs[:, 4:5], 'w': outputs[:, 5:6], 'l': outputs[:, 6:7], 'ori': outputs[:, 7:9]} # Multi-task training if tasks is not None: assert isinstance(tasks, tuple), "tasks need to be a tuple" return [dic_out[task] for task in tasks] # Preprocess the tensor bi = unnormalize_bi(dic_out['zb']) dic_out = {key: el.detach().cpu() for key, el in dic_out.items()} dd = torch.norm(dic_out['xyz'], p=2, dim=1).view(-1, 1) dic_out['xyzd'] = torch.cat((dic_out['xyz'], dd), dim=1) dic_out['d'], dic_out['bi'] = dd, bi yaw_pred = torch.atan2(dic_out['ori'][:, 0:1], dic_out['ori'][:, 1:2]) yaw_orig = back_correct_angles(yaw_pred, dic_out['xyzd'][:, 0:3]) dic_out['yaw'] = (yaw_pred, yaw_orig) # alpha, ry return dic_out