monoloco/monstereo/utils/camera.py
2020-08-20 11:33:19 +02:00

251 lines
7.5 KiB
Python

import math
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
def pixel_to_camera(uv_tensor, kk, z_met):
"""
Convert a tensor in pixel coordinate to absolute camera coordinates
It accepts lists or torch/numpy tensors of (m, 2) or (m, x, 2)
where x is the number of keypoints
"""
if isinstance(uv_tensor, (list, np.ndarray)):
uv_tensor = torch.tensor(uv_tensor)
if isinstance(kk, list):
kk = torch.tensor(kk)
if uv_tensor.size()[-1] != 2:
uv_tensor = uv_tensor.permute(0, 2, 1) # permute to have 2 as last dim to be padded
assert uv_tensor.size()[-1] == 2, "Tensor size not recognized"
uv_padded = F.pad(uv_tensor, pad=(0, 1), mode="constant", value=1) # pad only last-dim below with value 1
kk_1 = torch.inverse(kk)
xyz_met_norm = torch.matmul(uv_padded, kk_1.t()) # More general than torch.mm
xyz_met = xyz_met_norm * z_met
return xyz_met
def project_to_pixels(xyz, kk):
"""Project a single point in space into the image"""
xx, yy, zz = np.dot(kk, xyz)
uu = round(xx / zz)
vv = round(yy / zz)
return [uu, vv]
def project_3d(box_obj, kk):
"""
Project a 3D bounding box into the image plane using the central corners
"""
box_2d = []
# Obtain the 3d points of the box
xc, yc, zc = box_obj.center
ww, _, hh, = box_obj.wlh
# Points corresponding to a box at the z of the center
x1 = xc - ww/2
y1 = yc - hh/2 # Y axis directed below
x2 = xc + ww/2
y2 = yc + hh/2
xyz1 = np.array([x1, y1, zc])
xyz2 = np.array([x2, y2, zc])
corners_3d = np.array([xyz1, xyz2])
# Project them and convert into pixel coordinates
for xyz in corners_3d:
xx, yy, zz = np.dot(kk, xyz)
uu = xx / zz
vv = yy / zz
box_2d.append(uu)
box_2d.append(vv)
return box_2d
def get_keypoints(keypoints, mode):
"""
Extract center, shoulder or hip points of a keypoint
Input --> list or torch/numpy tensor [(m, 3, 17) or (3, 17)]
Output --> torch.tensor [(m, 2)]
"""
if isinstance(keypoints, (list, np.ndarray)):
keypoints = torch.tensor(keypoints)
if len(keypoints.size()) == 2: # add batch dim
keypoints = keypoints.unsqueeze(0)
assert len(keypoints.size()) == 3 and keypoints.size()[1] == 3, "tensor dimensions not recognized"
assert mode in ['center', 'bottom', 'head', 'shoulder', 'hip', 'ankle']
kps_in = keypoints[:, 0:2, :] # (m, 2, 17)
if mode == 'center':
kps_max, _ = kps_in.max(2) # returns value, indices
kps_min, _ = kps_in.min(2)
kps_out = (kps_max - kps_min) / 2 + kps_min # (m, 2) as keepdims is False
elif mode == 'bottom': # bottom center for kitti evaluation
kps_max, _ = kps_in.max(2)
kps_min, _ = kps_in.min(2)
kps_out_x = (kps_max[:, 0:1] - kps_min[:, 0:1]) / 2 + kps_min[:, 0:1]
kps_out_y = kps_max[:, 1:2]
kps_out = torch.cat((kps_out_x, kps_out_y), -1)
elif mode == 'head':
kps_out = kps_in[:, :, 0:5].mean(2)
elif mode == 'shoulder':
kps_out = kps_in[:, :, 5:7].mean(2)
elif mode == 'hip':
kps_out = kps_in[:, :, 11:13].mean(2)
elif mode == 'ankle':
kps_out = kps_in[:, :, 15:17].mean(2)
return kps_out # (m, 2)
def transform_kp(kps, tr_mode):
"""Apply different transformations to the keypoints based on the tr_mode"""
assert tr_mode in ("None", "singularity", "upper", "lower", "horizontal", "vertical", "lateral",
'shoulder', 'knee', 'upside', 'falling', 'random')
uu_c, vv_c = get_keypoints(kps, mode='center')
if tr_mode == "None":
return kps
if tr_mode == "singularity":
uus = [uu_c for uu in kps[0]]
vvs = [vv_c for vv in kps[1]]
elif tr_mode == "vertical":
uus = [uu_c for uu in kps[0]]
vvs = kps[1]
elif tr_mode == 'horizontal':
uus = kps[0]
vvs = [vv_c for vv in kps[1]]
elif tr_mode == 'shoulder':
uus = kps[0]
vvs = kps[1][:7] + [kps[1][6] for vv in kps[1][7:]]
elif tr_mode == 'knee':
uus = kps[0]
vvs = [kps[1][14] for vv in kps[1][:13]] + kps[1][13:]
elif tr_mode == 'up':
uus = kps[0]
vvs = [kp - 300 for kp in kps[1]]
elif tr_mode == 'falling':
uus = [kps[0][16] - kp + kps[1][16] for kp in kps[1]]
vvs = [kps[1][16] - kp + kps[0][16] for kp in kps[0]]
elif tr_mode == 'random':
uu_min = min(kps[0])
uu_max = max(kps[0])
vv_min = min(kps[1])
vv_max = max(kps[1])
np.random.seed(6)
uus = np.random.uniform(uu_min, uu_max, len(kps[0])).tolist()
vvs = np.random.uniform(vv_min, vv_max, len(kps[1])).tolist()
return [uus, vvs, kps[2], []]
def xyz_from_distance(distances, xy_centers):
"""
From distances and normalized image coordinates (z=1), extract the real world position xyz
distances --> tensor (m,1) or (m) or float
xy_centers --> tensor(m,3) or (3)
"""
if isinstance(distances, float):
distances = torch.tensor(distances).unsqueeze(0)
if len(distances.size()) == 1:
distances = distances.unsqueeze(1)
if len(xy_centers.size()) == 1:
xy_centers = xy_centers.unsqueeze(0)
assert xy_centers.size()[-1] == 3 and distances.size()[-1] == 1, "Size of tensor not recognized"
return xy_centers * distances / torch.sqrt(1 + xy_centers[:, 0:1].pow(2) + xy_centers[:, 1:2].pow(2))
def open_image(path_image):
with open(path_image, 'rb') as f:
pil_image = Image.open(f).convert('RGB')
return pil_image
def correct_angle(yaw, xyz):
"""
Correct the angle from the egocentric (global/ rotation_y)
to allocentric (camera perspective / observation angle)
and to be -pi < angle < pi
"""
correction = math.atan2(xyz[0], xyz[2])
yaw = yaw - correction
if yaw > np.pi:
yaw -= 2 * np.pi
elif yaw < -np.pi:
yaw += 2 * np.pi
assert -2 * np.pi <= yaw <= 2 * np.pi
return math.sin(yaw), math.cos(yaw), yaw
def back_correct_angles(yaws, xyz):
corrections = torch.atan2(xyz[:, 0], xyz[:, 2])
yaws = yaws + corrections.view(-1, 1)
mask_up = yaws > math.pi
yaws[mask_up] -= 2 * math.pi
mask_down = yaws < -math.pi
yaws[mask_down] += 2 * math.pi
assert torch.all(yaws < math.pi) & torch.all(yaws > - math.pi)
return yaws
def to_spherical(xyz):
"""convert from cartesian to spherical"""
xyz = np.array(xyz)
r = np.linalg.norm(xyz)
theta = math.atan2(xyz[2], xyz[0])
assert 0 <= theta < math.pi # 0 when positive x and no z.
psi = math.acos(xyz[1] / r)
assert 0 <= psi <= math.pi
return [r, theta, psi]
def to_cartesian(rtp, mode=None):
"""convert from spherical to cartesian"""
if isinstance(rtp, torch.Tensor):
if mode in ('x', 'y'):
r = rtp[:, 2]
t = rtp[:, 0]
p = rtp[:, 1]
if mode == 'x':
x = r * torch.sin(p) * torch.cos(t)
return x.view(-1, 1)
if mode == 'y':
y = r * torch.cos(p)
return y.view(-1, 1)
xyz = rtp.clone()
xyz[:, 0] = rtp[:, 0] * torch.sin(rtp[:, 2]) * torch.cos(rtp[:, 1])
xyz[:, 1] = rtp[:, 0] * torch.cos(rtp[:, 2])
xyz[:, 2] = rtp[:, 0] * torch.sin(rtp[:, 2]) * torch.sin(rtp[:, 1])
return xyz
x = rtp[0] * math.sin(rtp[2]) * math.cos(rtp[1])
y = rtp[0] * math.cos(rtp[2])
z = rtp[0] * math.sin(rtp[2]) * math.sin(rtp[1])
return[x, y, z]