# pylint: disable=too-many-statements, import-error """Extract joints annotations and match with nuScenes ground truths """ import os import sys import time import math import copy import json import logging from collections import defaultdict import datetime import numpy as np from nuscenes.nuscenes import NuScenes from nuscenes.utils import splits from pyquaternion import Quaternion from ..utils import get_iou_matches, append_cluster, select_categories, project_3d, correct_angle, normalize_hwl, \ to_spherical from ..network.process import preprocess_pifpaf, preprocess_monoloco class PreprocessNuscenes: """Preprocess Nuscenes dataset""" AV_W = 0.68 AV_L = 0.75 AV_H = 1.72 WLH_STD = 0.1 social = False CAMERAS = ('CAM_FRONT', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT') dic_jo = {'train': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[], clst=defaultdict(lambda: defaultdict(list))), 'val': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[], clst=defaultdict(lambda: defaultdict(list))), 'test': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[], clst=defaultdict(lambda: defaultdict(list))) } dic_names = defaultdict(lambda: defaultdict(list)) def __init__(self, dir_ann, dir_nuscenes, dataset, iou_min): logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger(__name__) self.iou_min = iou_min self.dir_ann = dir_ann dir_out = os.path.join('data', 'arrays') assert os.path.exists(dir_nuscenes), "Nuscenes directory does not exists" assert os.path.exists(self.dir_ann), "The annotations directory does not exists" assert os.path.exists(dir_out), "Joints directory does not exists" now = datetime.datetime.now() now_time = now.strftime("%Y%m%d-%H%M")[2:] self.path_joints = os.path.join(dir_out, 'joints-' + dataset + '-' + now_time + '.json') self.path_names = os.path.join(dir_out, 'names-' + dataset + '-' + now_time + '.json') self.nusc, self.scenes, self.split_train, self.split_val = factory(dataset, dir_nuscenes) def run(self): """ Prepare arrays for training """ cnt_scenes = cnt_samples = cnt_sd = cnt_ann = 0 start = time.time() for ii, scene in enumerate(self.scenes): end_scene = time.time() current_token = scene['first_sample_token'] cnt_scenes += 1 time_left = str((end_scene - start_scene) / 60 * (len(self.scenes) - ii))[:4] if ii != 0 else "NaN" sys.stdout.write('\r' + 'Elaborating scene {}, remaining time {} minutes' .format(cnt_scenes, time_left) + '\t\n') start_scene = time.time() if scene['name'] in self.split_train: phase = 'train' elif scene['name'] in self.split_val: phase = 'val' else: print("phase name not in training or validation split") continue while not current_token == "": sample_dic = self.nusc.get('sample', current_token) cnt_samples += 1 # Extract all the sample_data tokens for each sample for cam in self.CAMERAS: sd_token = sample_dic['data'][cam] cnt_sd += 1 # Extract all the annotations of the person path_im, boxes_obj, kk = self.nusc.get_sample_data(sd_token, box_vis_level=1) # At least one corner boxes_gt, boxes_3d, ys = extract_ground_truth(boxes_obj, kk) kk = kk.tolist() name = os.path.basename(path_im) basename, _ = os.path.splitext(name) self.dic_names[basename + '.jpg']['boxes'] = copy.deepcopy(boxes_gt) self.dic_names[basename + '.jpg']['ys'] = copy.deepcopy(ys) self.dic_names[basename + '.jpg']['K'] = copy.deepcopy(kk) # Run IoU with pifpaf detections and save path_pif = os.path.join(self.dir_ann, name + '.pifpaf.json') exists = os.path.isfile(path_pif) if exists: with open(path_pif, 'r') as file: annotations = json.load(file) boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900)) else: continue if keypoints: matches = get_iou_matches(boxes, boxes_gt, self.iou_min) for (idx, idx_gt) in matches: keypoint = keypoints[idx:idx + 1] inp = preprocess_monoloco(keypoint, kk).view(-1).tolist() lab = ys[idx_gt] lab = normalize_hwl(lab) self.dic_jo[phase]['kps'].append(keypoint) self.dic_jo[phase]['X'].append(inp) self.dic_jo[phase]['Y'].append(lab) self.dic_jo[phase]['names'].append(name) # One image name for each annotation self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_gt]) append_cluster(self.dic_jo, phase, inp, lab, keypoint) cnt_ann += 1 sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t') current_token = sample_dic['next'] with open(os.path.join(self.path_joints), 'w') as f: json.dump(self.dic_jo, f) with open(os.path.join(self.path_names), 'w') as f: json.dump(self.dic_names, f) end = time.time() extract_box_average(self.dic_jo['train']['boxes_3d']) print("\nSaved {} annotations for {} samples in {} scenes. Total time: {:.1f} minutes" .format(cnt_ann, cnt_samples, cnt_scenes, (end-start)/60)) print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints)) def extract_ground_truth(boxes_obj, kk, spherical=True): boxes_gt = [] boxes_3d = [] ys = [] for box_obj in boxes_obj: # Select category if box_obj.name[:6] != 'animal': general_name = box_obj.name.split('.')[0] + '.' + box_obj.name.split('.')[1] else: general_name = 'animal' if general_name in select_categories('all'): # Obtain 2D & 3D box boxes_gt.append(project_3d(box_obj, kk)) boxes_3d.append(box_obj.center.tolist() + box_obj.wlh.tolist()) # Angle yaw = quaternion_yaw(box_obj.orientation) assert - math.pi <= yaw <= math.pi sin, cos, _ = correct_angle(yaw, box_obj.center) hwl = [float(box_obj.wlh[i]) for i in (2, 0, 1)] # Spherical coordinates xyz = list(box_obj.center) dd = np.linalg.norm(box_obj.center) if spherical: rtp = to_spherical(xyz) loc = rtp[1:3] + xyz[2:3] + rtp[0:1] # [theta, psi, z, r] else: loc = xyz + [dd] output = loc + hwl + [sin, cos, yaw] ys.append(output) return boxes_gt, boxes_3d, ys def factory(dataset, dir_nuscenes): """Define dataset type and split training and validation""" assert dataset in ['nuscenes', 'nuscenes_mini', 'nuscenes_teaser'] if dataset == 'nuscenes_mini': version = 'v1.0-mini' else: version = 'v1.0-trainval' nusc = NuScenes(version=version, dataroot=dir_nuscenes, verbose=True) scenes = nusc.scene if dataset == 'nuscenes_teaser': with open("splits/nuscenes_teaser_scenes.txt", "r") as file: teaser_scenes = file.read().splitlines() scenes = [scene for scene in scenes if scene['token'] in teaser_scenes] with open("splits/split_nuscenes_teaser.json", "r") as file: dic_split = json.load(file) split_train = [scene['name'] for scene in scenes if scene['token'] in dic_split['train']] split_val = [scene['name'] for scene in scenes if scene['token'] in dic_split['val']] else: split_scenes = splits.create_splits_scenes() split_train, split_val = split_scenes['train'], split_scenes['val'] return nusc, scenes, split_train, split_val def quaternion_yaw(q: Quaternion, in_image_frame: bool = True) -> float: if in_image_frame: v = np.dot(q.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) else: v = np.dot(q.rotation_matrix, np.array([1, 0, 0])) yaw = np.arctan2(v[1], v[0]) return float(yaw) def extract_box_average(boxes_3d): boxes_np = np.array(boxes_3d) means = np.mean(boxes_np[:, 3:], axis=0) stds = np.std(boxes_np[:, 3:], axis=0) print(means) print(stds) def extract_social(inputs, ys, keypoints, idx, matches): """Output a (padded) version with all the 5 neighbours - Take the ground feet and the output z - make relative to the person (as social LSTM)""" all_inputs = [] # Find the lowest relative ground foot ground_foot = np.max(np.array(inputs)[:, [31, 33]], axis=1) rel_ground_foot = ground_foot - ground_foot[idx] rel_ground_foot = rel_ground_foot.tolist() # Order the people based on their distance base = np.array([np.mean(np.array(keypoints[idx][0])), np.mean(np.array(keypoints[idx][1]))]) # delta_input = [abs((inp[31] + inp[33]) / 2 - base) for inp in inputs] delta_input = [np.linalg.norm(base - np.array([np.mean(np.array(kp[0])), np.mean(np.array(kp[1]))])) for kp in keypoints] sorted_indices = sorted(range(len(delta_input)), key=lambda k: delta_input[k]) # Return a list of sorted indices all_inputs.extend(inputs[idx]) indices_idx = [idx for (idx, idx_gt) in matches] if len(sorted_indices) > 2: aa = 5 for ii in range(1, 3): try: index = sorted_indices[ii] # Extract the idx_gt corresponding to the input we are attaching if it exists try: idx_idx_gt = indices_idx.index(index) idx_gt = matches[idx_idx_gt][1] all_inputs.append(rel_ground_foot[index]) # Relative lower ground foot all_inputs.append(float(ys[idx_gt][3])) # Output Z except ValueError: all_inputs.extend([0.] * 2) except IndexError: all_inputs.extend([0.] * 2) assert len(all_inputs) == 34 + 2 * 2 return all_inputs # def get_jean_yaw(box_obj): # b_corners = box_obj.bottom_corners() # center = box_obj.center # back_point = [(b_corners[0, 2] + b_corners[0, 3]) / 2, (b_corners[2, 2] + b_corners[2, 3]) / 2] # # x = b_corners[0, :] - back_point[0] # y = b_corners[2, :] - back_point[1] # # angle = math.atan2((x[0] + x[1]) / 2, (y[0] + y[1]) / 2) * 180 / 3.14 # angle = (angle + 360) % 360 # correction = math.atan2(center[0], center[2]) * 180 / 3.14 # return angle, correction