monoloco/monstereo/prep/preprocess_nu.py

# pylint: disable=too-many-statements, import-error


"""Extract joints annotations and match with nuScenes ground truths
"""

import os
import sys
import time
import math
import copy
import json
import logging
from collections import defaultdict
import datetime

import numpy as np
from nuscenes.nuscenes import NuScenes
from nuscenes.utils import splits
from pyquaternion import Quaternion

from ..utils import get_iou_matches, append_cluster, select_categories, project_3d, correct_angle, normalize_hwl, \
    to_spherical
from ..network.process import preprocess_pifpaf, preprocess_monoloco


class PreprocessNuscenes:
    """Preprocess Nuscenes dataset"""
    AV_W = 0.68
    AV_L = 0.75
    AV_H = 1.72
    WLH_STD = 0.1
    social = False

    CAMERAS = ('CAM_FRONT', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT')
    dic_jo = {'train': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[],
                            clst=defaultdict(lambda: defaultdict(list))),
              'val': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[],
                          clst=defaultdict(lambda: defaultdict(list))),
              'test': dict(X=[], Y=[], names=[], kps=[], boxes_3d=[], K=[],
                           clst=defaultdict(lambda: defaultdict(list)))
              }
    dic_names = defaultdict(lambda: defaultdict(list))

    def __init__(self, dir_ann, dir_nuscenes, dataset, iou_min):

        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)

        self.iou_min = iou_min
        self.dir_ann = dir_ann
        dir_out = os.path.join('data', 'arrays')
        assert os.path.exists(dir_nuscenes), "Nuscenes directory does not exists"
        assert os.path.exists(self.dir_ann), "The annotations directory does not exists"
        assert os.path.exists(dir_out), "Joints directory does not exists"

        now = datetime.datetime.now()
        now_time = now.strftime("%Y%m%d-%H%M")[2:]
        self.path_joints = os.path.join(dir_out, 'joints-' + dataset + '-' + now_time + '.json')
        self.path_names = os.path.join(dir_out, 'names-' + dataset + '-' + now_time + '.json')

        self.nusc, self.scenes, self.split_train, self.split_val = factory(dataset, dir_nuscenes)

    def run(self):
        """
        Prepare arrays for training
        """
        cnt_scenes = cnt_samples = cnt_sd = cnt_ann = 0
        start = time.time()
        for ii, scene in enumerate(self.scenes):
            end_scene = time.time()
            current_token = scene['first_sample_token']
            cnt_scenes += 1
            time_left = str((end_scene - start_scene) / 60 * (len(self.scenes) - ii))[:4] if ii != 0 else "NaN"

            sys.stdout.write('\r' + 'Elaborating scene {}, remaining time {} minutes'
                             .format(cnt_scenes, time_left) + '\t\n')
            start_scene = time.time()
            if scene['name'] in self.split_train:
                phase = 'train'
            elif scene['name'] in self.split_val:
                phase = 'val'
            else:
                print("phase name not in training or validation split")
                continue

            while not current_token == "":
                sample_dic = self.nusc.get('sample', current_token)
                cnt_samples += 1

                # Extract all the sample_data tokens for each sample
                for cam in self.CAMERAS:
                    sd_token = sample_dic['data'][cam]
                    cnt_sd += 1

                    # Extract all the annotations of the person
                    path_im, boxes_obj, kk = self.nusc.get_sample_data(sd_token, box_vis_level=1)  # At least one corner
                    boxes_gt, boxes_3d, ys = extract_ground_truth(boxes_obj, kk)
                    kk = kk.tolist()
                    name = os.path.basename(path_im)
                    basename, _ = os.path.splitext(name)

                    self.dic_names[basename + '.jpg']['boxes'] = copy.deepcopy(boxes_gt)
                    self.dic_names[basename + '.jpg']['ys'] = copy.deepcopy(ys)
                    self.dic_names[basename + '.jpg']['K'] = copy.deepcopy(kk)

                    # Run IoU with pifpaf detections and save
                    path_pif = os.path.join(self.dir_ann, name + '.pifpaf.json')
                    exists = os.path.isfile(path_pif)

                    if exists:
                        with open(path_pif, 'r') as file:
                            annotations = json.load(file)
                            boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900))
                    else:
                        continue

                    if keypoints:
                        matches = get_iou_matches(boxes, boxes_gt, self.iou_min)
                        for (idx, idx_gt) in matches:
                            keypoint = keypoints[idx:idx + 1]
                            inp = preprocess_monoloco(keypoint, kk).view(-1).tolist()
                            lab = ys[idx_gt]
                            lab = normalize_hwl(lab)
                            self.dic_jo[phase]['kps'].append(keypoint)
                            self.dic_jo[phase]['X'].append(inp)
                            self.dic_jo[phase]['Y'].append(lab)
                            self.dic_jo[phase]['names'].append(name)  # One image name for each annotation
                            self.dic_jo[phase]['boxes_3d'].append(boxes_3d[idx_gt])
                            append_cluster(self.dic_jo, phase, inp, lab, keypoint)
                            cnt_ann += 1
                            sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t')

                current_token = sample_dic['next']

        with open(os.path.join(self.path_joints), 'w') as f:
            json.dump(self.dic_jo, f)
        with open(os.path.join(self.path_names), 'w') as f:
            json.dump(self.dic_names, f)
        end = time.time()

        extract_box_average(self.dic_jo['train']['boxes_3d'])
        print("\nSaved {} annotations for {} samples in {} scenes. Total time: {:.1f} minutes"
              .format(cnt_ann, cnt_samples, cnt_scenes, (end-start)/60))
        print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints))


def extract_ground_truth(boxes_obj, kk, spherical=True):

    boxes_gt = []
    boxes_3d = []
    ys = []

    for box_obj in boxes_obj:

        # Select category
        if box_obj.name[:6] != 'animal':
            general_name = box_obj.name.split('.')[0] + '.' + box_obj.name.split('.')[1]
        else:
            general_name = 'animal'
        if general_name in select_categories('all'):

            # Obtain 2D & 3D box
            boxes_gt.append(project_3d(box_obj, kk))
            boxes_3d.append(box_obj.center.tolist() + box_obj.wlh.tolist())

            # Angle
            yaw = quaternion_yaw(box_obj.orientation)
            assert - math.pi <= yaw <= math.pi
            sin, cos, _ = correct_angle(yaw, box_obj.center)
            hwl = [float(box_obj.wlh[i]) for i in (2, 0, 1)]

            # Spherical coordinates
            xyz = list(box_obj.center)
            dd = np.linalg.norm(box_obj.center)
            if spherical:
                rtp = to_spherical(xyz)
                loc = rtp[1:3] + xyz[2:3] + rtp[0:1]  # [theta, psi, z, r]
            else:
                loc = xyz + [dd]

            output = loc + hwl + [sin, cos, yaw]
            ys.append(output)

    return boxes_gt, boxes_3d, ys


def factory(dataset, dir_nuscenes):
    """Define dataset type and split training and validation"""

    assert dataset in ['nuscenes', 'nuscenes_mini', 'nuscenes_teaser']
    if dataset == 'nuscenes_mini':
        version = 'v1.0-mini'
    else:
        version = 'v1.0-trainval'

    nusc = NuScenes(version=version, dataroot=dir_nuscenes, verbose=True)
    scenes = nusc.scene

    if dataset == 'nuscenes_teaser':
        with open("splits/nuscenes_teaser_scenes.txt", "r") as file:
            teaser_scenes = file.read().splitlines()
        scenes = [scene for scene in scenes if scene['token'] in teaser_scenes]
        with open("splits/split_nuscenes_teaser.json", "r") as file:
            dic_split = json.load(file)
        split_train = [scene['name'] for scene in scenes if scene['token'] in dic_split['train']]
        split_val = [scene['name'] for scene in scenes if scene['token'] in dic_split['val']]
    else:
        split_scenes = splits.create_splits_scenes()
        split_train, split_val = split_scenes['train'], split_scenes['val']

    return nusc, scenes, split_train, split_val


def quaternion_yaw(q: Quaternion, in_image_frame: bool = True) -> float:
    if in_image_frame:
        v = np.dot(q.rotation_matrix, np.array([1, 0, 0]))
        yaw = -np.arctan2(v[2], v[0])
    else:
        v = np.dot(q.rotation_matrix, np.array([1, 0, 0]))
        yaw = np.arctan2(v[1], v[0])
    return float(yaw)


def extract_box_average(boxes_3d):
    boxes_np = np.array(boxes_3d)
    means = np.mean(boxes_np[:, 3:], axis=0)
    stds = np.std(boxes_np[:, 3:], axis=0)
    print(means)
    print(stds)


def extract_social(inputs, ys, keypoints, idx, matches):
    """Output a (padded) version with all the 5 neighbours
    - Take the ground feet and the output z
    - make relative to the person (as social LSTM)"""
    all_inputs = []

    # Find the lowest relative ground foot
    ground_foot = np.max(np.array(inputs)[:, [31, 33]], axis=1)
    rel_ground_foot = ground_foot - ground_foot[idx]
    rel_ground_foot = rel_ground_foot.tolist()

    # Order the people based on their distance
    base = np.array([np.mean(np.array(keypoints[idx][0])), np.mean(np.array(keypoints[idx][1]))])
    # delta_input = [abs((inp[31] + inp[33]) / 2 - base) for inp in inputs]
    delta_input = [np.linalg.norm(base - np.array([np.mean(np.array(kp[0])), np.mean(np.array(kp[1]))]))
                   for kp in keypoints]
    sorted_indices = sorted(range(len(delta_input)), key=lambda k: delta_input[k])  # Return a list of sorted indices
    all_inputs.extend(inputs[idx])

    indices_idx = [idx for (idx, idx_gt) in matches]
    if len(sorted_indices) > 2:
        aa = 5
    for ii in range(1, 3):
        try:
            index = sorted_indices[ii]

            # Extract the idx_gt corresponding to the input we are attaching if it exists
            try:
                idx_idx_gt = indices_idx.index(index)
                idx_gt = matches[idx_idx_gt][1]
                all_inputs.append(rel_ground_foot[index])  # Relative lower ground foot
                all_inputs.append(float(ys[idx_gt][3]))  # Output Z
            except ValueError:
                all_inputs.extend([0.] * 2)
        except IndexError:
            all_inputs.extend([0.] * 2)
    assert len(all_inputs) == 34 + 2 * 2
    return all_inputs


# def get_jean_yaw(box_obj):
#     b_corners = box_obj.bottom_corners()
#     center = box_obj.center
#     back_point = [(b_corners[0, 2] + b_corners[0, 3]) / 2, (b_corners[2, 2] + b_corners[2, 3]) / 2]
#
#     x = b_corners[0, :] - back_point[0]
#     y = b_corners[2, :] - back_point[1]
#
#     angle = math.atan2((x[0] + x[1]) / 2, (y[0] + y[1]) / 2) * 180 / 3.14
#     angle = (angle + 360) % 360
#     correction = math.atan2(center[0], center[2]) * 180 / 3.14
#     return angle, correction