import math import numpy as np import torch import torch.nn.functional as F from PIL import Image def pixel_to_camera(uv_tensor, kk, z_met): """ Convert a tensor in pixel coordinate to absolute camera coordinates It accepts lists or torch/numpy tensors of (m, 2) or (m, x, 2) where x is the number of keypoints """ if isinstance(uv_tensor, (list, np.ndarray)): uv_tensor = torch.tensor(uv_tensor) if isinstance(kk, list): kk = torch.tensor(kk) if uv_tensor.size()[-1] != 2: uv_tensor = uv_tensor.permute(0, 2, 1) # permute to have 2 as last dim to be padded assert uv_tensor.size()[-1] == 2, "Tensor size not recognized" uv_padded = F.pad(uv_tensor, pad=(0, 1), mode="constant", value=1) # pad only last-dim below with value 1 kk_1 = torch.inverse(kk) xyz_met_norm = torch.matmul(uv_padded, kk_1.t()) # More general than torch.mm xyz_met = xyz_met_norm * z_met return xyz_met def project_to_pixels(xyz, kk): """Project a single point in space into the image""" xx, yy, zz = np.dot(kk, xyz) uu = round(xx / zz) vv = round(yy / zz) return [uu, vv] def project_3d(box_obj, kk): """ Project a 3D bounding box into the image plane using the central corners """ box_2d = [] # Obtain the 3d points of the box xc, yc, zc = box_obj.center ww, _, hh, = box_obj.wlh # Points corresponding to a box at the z of the center x1 = xc - ww/2 y1 = yc - hh/2 # Y axis directed below x2 = xc + ww/2 y2 = yc + hh/2 xyz1 = np.array([x1, y1, zc]) xyz2 = np.array([x2, y2, zc]) corners_3d = np.array([xyz1, xyz2]) # Project them and convert into pixel coordinates for xyz in corners_3d: xx, yy, zz = np.dot(kk, xyz) uu = xx / zz vv = yy / zz box_2d.append(uu) box_2d.append(vv) return box_2d def get_keypoints(keypoints, mode): """ Extract center, shoulder or hip points of a keypoint Input --> list or torch/numpy tensor [(m, 3, 17) or (3, 17)] Output --> torch.tensor [(m, 2)] """ if isinstance(keypoints, (list, np.ndarray)): keypoints = torch.tensor(keypoints) if len(keypoints.size()) == 2: # add batch dim keypoints = keypoints.unsqueeze(0) assert len(keypoints.size()) == 3 and keypoints.size()[1] == 3, "tensor dimensions not recognized" assert mode in ['center', 'bottom', 'head', 'shoulder', 'hip', 'ankle'] kps_in = keypoints[:, 0:2, :] # (m, 2, 17) if mode == 'center': kps_max, _ = kps_in.max(2) # returns value, indices kps_min, _ = kps_in.min(2) kps_out = (kps_max - kps_min) / 2 + kps_min # (m, 2) as keepdims is False elif mode == 'bottom': # bottom center for kitti evaluation kps_max, _ = kps_in.max(2) kps_min, _ = kps_in.min(2) kps_out_x = (kps_max[:, 0:1] - kps_min[:, 0:1]) / 2 + kps_min[:, 0:1] kps_out_y = kps_max[:, 1:2] kps_out = torch.cat((kps_out_x, kps_out_y), -1) elif mode == 'head': kps_out = kps_in[:, :, 0:5].mean(2) elif mode == 'shoulder': kps_out = kps_in[:, :, 5:7].mean(2) elif mode == 'hip': kps_out = kps_in[:, :, 11:13].mean(2) elif mode == 'ankle': kps_out = kps_in[:, :, 15:17].mean(2) return kps_out # (m, 2) def transform_kp(kps, tr_mode): """Apply different transformations to the keypoints based on the tr_mode""" assert tr_mode in ("None", "singularity", "upper", "lower", "horizontal", "vertical", "lateral", 'shoulder', 'knee', 'upside', 'falling', 'random') uu_c, vv_c = get_keypoints(kps, mode='center') if tr_mode == "None": return kps if tr_mode == "singularity": uus = [uu_c for uu in kps[0]] vvs = [vv_c for vv in kps[1]] elif tr_mode == "vertical": uus = [uu_c for uu in kps[0]] vvs = kps[1] elif tr_mode == 'horizontal': uus = kps[0] vvs = [vv_c for vv in kps[1]] elif tr_mode == 'shoulder': uus = kps[0] vvs = kps[1][:7] + [kps[1][6] for vv in kps[1][7:]] elif tr_mode == 'knee': uus = kps[0] vvs = [kps[1][14] for vv in kps[1][:13]] + kps[1][13:] elif tr_mode == 'up': uus = kps[0] vvs = [kp - 300 for kp in kps[1]] elif tr_mode == 'falling': uus = [kps[0][16] - kp + kps[1][16] for kp in kps[1]] vvs = [kps[1][16] - kp + kps[0][16] for kp in kps[0]] elif tr_mode == 'random': uu_min = min(kps[0]) uu_max = max(kps[0]) vv_min = min(kps[1]) vv_max = max(kps[1]) np.random.seed(6) uus = np.random.uniform(uu_min, uu_max, len(kps[0])).tolist() vvs = np.random.uniform(vv_min, vv_max, len(kps[1])).tolist() return [uus, vvs, kps[2], []] def xyz_from_distance(distances, xy_centers): """ From distances and normalized image coordinates (z=1), extract the real world position xyz distances --> tensor (m,1) or (m) or float xy_centers --> tensor(m,3) or (3) """ if isinstance(distances, float): distances = torch.tensor(distances).unsqueeze(0) if len(distances.size()) == 1: distances = distances.unsqueeze(1) if len(xy_centers.size()) == 1: xy_centers = xy_centers.unsqueeze(0) assert xy_centers.size()[-1] == 3 and distances.size()[-1] == 1, "Size of tensor not recognized" return xy_centers * distances / torch.sqrt(1 + xy_centers[:, 0:1].pow(2) + xy_centers[:, 1:2].pow(2)) def open_image(path_image): with open(path_image, 'rb') as f: pil_image = Image.open(f).convert('RGB') return pil_image def correct_angle(yaw, xyz): """ Correct the angle from the egocentric (global/ rotation_y) to allocentric (camera perspective / observation angle) and to be -pi < angle < pi """ correction = math.atan2(xyz[0], xyz[2]) yaw = yaw - correction if yaw > np.pi: yaw -= 2 * np.pi elif yaw < -np.pi: yaw += 2 * np.pi assert -2 * np.pi <= yaw <= 2 * np.pi return math.sin(yaw), math.cos(yaw), yaw def back_correct_angles(yaws, xyz): corrections = torch.atan2(xyz[:, 0], xyz[:, 2]) yaws = yaws + corrections.view(-1, 1) mask_up = yaws > math.pi yaws[mask_up] -= 2 * math.pi mask_down = yaws < -math.pi yaws[mask_down] += 2 * math.pi assert torch.all(yaws < math.pi) & torch.all(yaws > - math.pi) return yaws def to_spherical(xyz): """convert from cartesian to spherical""" xyz = np.array(xyz) r = np.linalg.norm(xyz) theta = math.atan2(xyz[2], xyz[0]) assert 0 <= theta < math.pi # 0 when positive x and no z. psi = math.acos(xyz[1] / r) assert 0 <= psi <= math.pi return [r, theta, psi] def to_cartesian(rtp, mode=None): """convert from spherical to cartesian""" if isinstance(rtp, torch.Tensor): if mode in ('x', 'y'): r = rtp[:, 2] t = rtp[:, 0] p = rtp[:, 1] if mode == 'x': x = r * torch.sin(p) * torch.cos(t) return x.view(-1, 1) if mode == 'y': y = r * torch.cos(p) return y.view(-1, 1) xyz = rtp.clone() xyz[:, 0] = rtp[:, 0] * torch.sin(rtp[:, 2]) * torch.cos(rtp[:, 1]) xyz[:, 1] = rtp[:, 0] * torch.cos(rtp[:, 2]) xyz[:, 2] = rtp[:, 0] * torch.sin(rtp[:, 2]) * torch.sin(rtp[:, 1]) return xyz x = rtp[0] * math.sin(rtp[2]) * math.cos(rtp[1]) y = rtp[0] * math.cos(rtp[2]) z = rtp[0] * math.sin(rtp[2]) * math.sin(rtp[1]) return[x, y, z]