monoloco/monstereo/prep/preprocess_nu.py
2020-08-20 11:33:19 +02:00

285 lines
11 KiB
Python

# pylint: disable=too-many-statements, import-error
"""Extract joints annotations and match with nuScenes ground truths
"""
import os
import sys
import time
import math
import copy
import json
import logging
from collections import defaultdict
import datetime
import numpy as np
from nuscenes.nuscenes import NuScenes
from nuscenes.utils import splits
from pyquaternion import Quaternion
from ..utils import get_iou_matches, append_cluster, select_categories, project_3d, correct_angle, normalize_hwl, \
to_spherical
from ..network.process import preprocess_pifpaf, preprocess_monoloco
class PreprocessNuscenes:
"""Preprocess Nuscenes dataset"""
AV_W = 0.68
AV_L = 0.75
AV_H = 1.72
WLH_STD = 0.1
social = False
CAMERAS = ('CAM_FRONT', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT')
dic_jo = {'train': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[],
clst=defaultdict(lambda: defaultdict(list))),
'val': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[],
clst=defaultdict(lambda: defaultdict(list))),
'test': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[],
clst=defaultdict(lambda: defaultdict(list)))
}
dic_names = defaultdict(lambda: defaultdict(list))
def __init__(self, dir_ann, dir_nuscenes, dataset, iou_min):
logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger(__name__)
self.iou_min = iou_min
self.dir_ann = dir_ann
dir_out = os.path.join('data', 'arrays')
assert os.path.exists(dir_nuscenes), "Nuscenes directory does not exists"
assert os.path.exists(self.dir_ann), "The annotations directory does not exists"
assert os.path.exists(dir_out), "Joints directory does not exists"
now = datetime.datetime.now()
now_time = now.strftime("%Y%m%d-%H%M")[2:]
self.path_joints = os.path.join(dir_out, 'joints-' + dataset + '-' + now_time + '.json')
self.path_names = os.path.join(dir_out, 'names-' + dataset + '-' + now_time + '.json')
self.nusc, self.scenes, self.split_train, self.split_val = factory(dataset, dir_nuscenes)
def run(self):
"""
Prepare arrays for training
"""
cnt_scenes = cnt_samples = cnt_sd = cnt_ann = 0
start = time.time()
for ii, scene in enumerate(self.scenes):
end_scene = time.time()
current_token = scene['first_sample_token']
cnt_scenes += 1
time_left = str((end_scene - start_scene) / 60 * (len(self.scenes) - ii))[:4] if ii != 0 else "NaN"
sys.stdout.write('\r' + 'Elaborating scene {}, remaining time {} minutes'
.format(cnt_scenes, time_left) + '\t\n')
start_scene = time.time()
if scene['name'] in self.split_train:
phase = 'train'
elif scene['name'] in self.split_val:
phase = 'val'
else:
print("phase name not in training or validation split")
continue
while not current_token == "":
sample_dic = self.nusc.get('sample', current_token)
cnt_samples += 1
# Extract all the sample_data tokens for each sample
for cam in self.CAMERAS:
sd_token = sample_dic['data'][cam]
cnt_sd += 1
# Extract all the annotations of the person
path_im, boxes_obj, kk = self.nusc.get_sample_data(sd_token, box_vis_level=1) # At least one corner
boxes_gt, boxes_3d, ys = extract_ground_truth(boxes_obj, kk)
kk = kk.tolist()
name = os.path.basename(path_im)
basename, _ = os.path.splitext(name)
self.dic_names[basename + '.jpg']['boxes'] = copy.deepcopy(boxes_gt)
self.dic_names[basename + '.jpg']['ys'] = copy.deepcopy(ys)
self.dic_names[basename + '.jpg']['K'] = copy.deepcopy(kk)
# Run IoU with pifpaf detections and save
path_pif = os.path.join(self.dir_ann, name + '.pifpaf.json')
exists = os.path.isfile(path_pif)
if exists:
with open(path_pif, 'r') as file:
annotations = json.load(file)
boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900))
else:
continue
if keypoints:
matches = get_iou_matches(boxes, boxes_gt, self.iou_min)
for (idx, idx_gt) in matches:
keypoint = keypoints[idx:idx + 1]
inp = preprocess_monoloco(keypoint, kk).view(-1).tolist()
lab = ys[idx_gt]
lab = normalize_hwl(lab)
self.dic_jo[phase]['kps'].append(keypoint)
self.dic_jo[phase]['X'].append(inp)
self.dic_jo[phase]['Y'].append(lab)
self.dic_jo[phase]['names'].append(name) # One image name for each annotation
self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_gt])
append_cluster(self.dic_jo, phase, inp, lab, keypoint)
cnt_ann += 1
sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t')
current_token = sample_dic['next']
with open(os.path.join(self.path_joints), 'w') as f:
json.dump(self.dic_jo, f)
with open(os.path.join(self.path_names), 'w') as f:
json.dump(self.dic_names, f)
end = time.time()
extract_box_average(self.dic_jo['train']['boxes_3d'])
print("\nSaved {} annotations for {} samples in {} scenes. Total time: {:.1f} minutes"
.format(cnt_ann, cnt_samples, cnt_scenes, (end-start)/60))
print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints))
def extract_ground_truth(boxes_obj, kk, spherical=True):
boxes_gt = []
boxes_3d = []
ys = []
for box_obj in boxes_obj:
# Select category
if box_obj.name[:6] != 'animal':
general_name = box_obj.name.split('.')[0] + '.' + box_obj.name.split('.')[1]
else:
general_name = 'animal'
if general_name in select_categories('all'):
# Obtain 2D & 3D box
boxes_gt.append(project_3d(box_obj, kk))
boxes_3d.append(box_obj.center.tolist() + box_obj.wlh.tolist())
# Angle
yaw = quaternion_yaw(box_obj.orientation)
assert - math.pi <= yaw <= math.pi
sin, cos, _ = correct_angle(yaw, box_obj.center)
hwl = [float(box_obj.wlh[i]) for i in (2, 0, 1)]
# Spherical coordinates
xyz = list(box_obj.center)
dd = np.linalg.norm(box_obj.center)
if spherical:
rtp = to_spherical(xyz)
loc = rtp[1:3] + xyz[2:3] + rtp[0:1] # [theta, psi, z, r]
else:
loc = xyz + [dd]
output = loc + hwl + [sin, cos, yaw]
ys.append(output)
return boxes_gt, boxes_3d, ys
def factory(dataset, dir_nuscenes):
"""Define dataset type and split training and validation"""
assert dataset in ['nuscenes', 'nuscenes_mini', 'nuscenes_teaser']
if dataset == 'nuscenes_mini':
version = 'v1.0-mini'
else:
version = 'v1.0-trainval'
nusc = NuScenes(version=version, dataroot=dir_nuscenes, verbose=True)
scenes = nusc.scene
if dataset == 'nuscenes_teaser':
with open("splits/nuscenes_teaser_scenes.txt", "r") as file:
teaser_scenes = file.read().splitlines()
scenes = [scene for scene in scenes if scene['token'] in teaser_scenes]
with open("splits/split_nuscenes_teaser.json", "r") as file:
dic_split = json.load(file)
split_train = [scene['name'] for scene in scenes if scene['token'] in dic_split['train']]
split_val = [scene['name'] for scene in scenes if scene['token'] in dic_split['val']]
else:
split_scenes = splits.create_splits_scenes()
split_train, split_val = split_scenes['train'], split_scenes['val']
return nusc, scenes, split_train, split_val
def quaternion_yaw(q: Quaternion, in_image_frame: bool = True) -> float:
if in_image_frame:
v = np.dot(q.rotation_matrix, np.array([1, 0, 0]))
yaw = -np.arctan2(v[2], v[0])
else:
v = np.dot(q.rotation_matrix, np.array([1, 0, 0]))
yaw = np.arctan2(v[1], v[0])
return float(yaw)
def extract_box_average(boxes_3d):
boxes_np = np.array(boxes_3d)
means = np.mean(boxes_np[:, 3:], axis=0)
stds = np.std(boxes_np[:, 3:], axis=0)
print(means)
print(stds)
def extract_social(inputs, ys, keypoints, idx, matches):
"""Output a (padded) version with all the 5 neighbours
- Take the ground feet and the output z
- make relative to the person (as social LSTM)"""
all_inputs = []
# Find the lowest relative ground foot
ground_foot = np.max(np.array(inputs)[:, [31, 33]], axis=1)
rel_ground_foot = ground_foot - ground_foot[idx]
rel_ground_foot = rel_ground_foot.tolist()
# Order the people based on their distance
base = np.array([np.mean(np.array(keypoints[idx][0])), np.mean(np.array(keypoints[idx][1]))])
# delta_input = [abs((inp[31] + inp[33]) / 2 - base) for inp in inputs]
delta_input = [np.linalg.norm(base - np.array([np.mean(np.array(kp[0])), np.mean(np.array(kp[1]))]))
for kp in keypoints]
sorted_indices = sorted(range(len(delta_input)), key=lambda k: delta_input[k]) # Return a list of sorted indices
all_inputs.extend(inputs[idx])
indices_idx = [idx for (idx, idx_gt) in matches]
if len(sorted_indices) > 2:
aa = 5
for ii in range(1, 3):
try:
index = sorted_indices[ii]
# Extract the idx_gt corresponding to the input we are attaching if it exists
try:
idx_idx_gt = indices_idx.index(index)
idx_gt = matches[idx_idx_gt][1]
all_inputs.append(rel_ground_foot[index]) # Relative lower ground foot
all_inputs.append(float(ys[idx_gt][3])) # Output Z
except ValueError:
all_inputs.extend([0.] * 2)
except IndexError:
all_inputs.extend([0.] * 2)
assert len(all_inputs) == 34 + 2 * 2
return all_inputs
# def get_jean_yaw(box_obj):
# b_corners = box_obj.bottom_corners()
# center = box_obj.center
# back_point = [(b_corners[0, 2] + b_corners[0, 3]) / 2, (b_corners[2, 2] + b_corners[2, 3]) / 2]
#
# x = b_corners[0, :] - back_point[0]
# y = b_corners[2, :] - back_point[1]
#
# angle = math.atan2((x[0] + x[1]) / 2, (y[0] + y[1]) / 2) * 180 / 3.14
# angle = (angle + 360) % 360
# correction = math.atan2(center[0], center[2]) * 180 / 3.14
# return angle, correction