251 lines
7.5 KiB
Python
251 lines
7.5 KiB
Python
|
|
import math
|
|
|
|
import numpy as np
|
|
import torch
|
|
import torch.nn.functional as F
|
|
from PIL import Image
|
|
|
|
|
|
def pixel_to_camera(uv_tensor, kk, z_met):
|
|
"""
|
|
Convert a tensor in pixel coordinate to absolute camera coordinates
|
|
It accepts lists or torch/numpy tensors of (m, 2) or (m, x, 2)
|
|
where x is the number of keypoints
|
|
"""
|
|
if isinstance(uv_tensor, (list, np.ndarray)):
|
|
uv_tensor = torch.tensor(uv_tensor)
|
|
if isinstance(kk, list):
|
|
kk = torch.tensor(kk)
|
|
if uv_tensor.size()[-1] != 2:
|
|
uv_tensor = uv_tensor.permute(0, 2, 1) # permute to have 2 as last dim to be padded
|
|
assert uv_tensor.size()[-1] == 2, "Tensor size not recognized"
|
|
uv_padded = F.pad(uv_tensor, pad=(0, 1), mode="constant", value=1) # pad only last-dim below with value 1
|
|
|
|
kk_1 = torch.inverse(kk)
|
|
xyz_met_norm = torch.matmul(uv_padded, kk_1.t()) # More general than torch.mm
|
|
xyz_met = xyz_met_norm * z_met
|
|
|
|
return xyz_met
|
|
|
|
|
|
def project_to_pixels(xyz, kk):
|
|
"""Project a single point in space into the image"""
|
|
xx, yy, zz = np.dot(kk, xyz)
|
|
uu = round(xx / zz)
|
|
vv = round(yy / zz)
|
|
return [uu, vv]
|
|
|
|
|
|
def project_3d(box_obj, kk):
|
|
"""
|
|
Project a 3D bounding box into the image plane using the central corners
|
|
"""
|
|
box_2d = []
|
|
# Obtain the 3d points of the box
|
|
xc, yc, zc = box_obj.center
|
|
ww, _, hh, = box_obj.wlh
|
|
|
|
# Points corresponding to a box at the z of the center
|
|
x1 = xc - ww/2
|
|
y1 = yc - hh/2 # Y axis directed below
|
|
x2 = xc + ww/2
|
|
y2 = yc + hh/2
|
|
xyz1 = np.array([x1, y1, zc])
|
|
xyz2 = np.array([x2, y2, zc])
|
|
corners_3d = np.array([xyz1, xyz2])
|
|
|
|
# Project them and convert into pixel coordinates
|
|
for xyz in corners_3d:
|
|
xx, yy, zz = np.dot(kk, xyz)
|
|
uu = xx / zz
|
|
vv = yy / zz
|
|
box_2d.append(uu)
|
|
box_2d.append(vv)
|
|
|
|
return box_2d
|
|
|
|
|
|
def get_keypoints(keypoints, mode):
|
|
"""
|
|
Extract center, shoulder or hip points of a keypoint
|
|
Input --> list or torch/numpy tensor [(m, 3, 17) or (3, 17)]
|
|
Output --> torch.tensor [(m, 2)]
|
|
"""
|
|
if isinstance(keypoints, (list, np.ndarray)):
|
|
keypoints = torch.tensor(keypoints)
|
|
if len(keypoints.size()) == 2: # add batch dim
|
|
keypoints = keypoints.unsqueeze(0)
|
|
assert len(keypoints.size()) == 3 and keypoints.size()[1] == 3, "tensor dimensions not recognized"
|
|
assert mode in ['center', 'bottom', 'head', 'shoulder', 'hip', 'ankle']
|
|
|
|
kps_in = keypoints[:, 0:2, :] # (m, 2, 17)
|
|
if mode == 'center':
|
|
kps_max, _ = kps_in.max(2) # returns value, indices
|
|
kps_min, _ = kps_in.min(2)
|
|
kps_out = (kps_max - kps_min) / 2 + kps_min # (m, 2) as keepdims is False
|
|
|
|
elif mode == 'bottom': # bottom center for kitti evaluation
|
|
kps_max, _ = kps_in.max(2)
|
|
kps_min, _ = kps_in.min(2)
|
|
kps_out_x = (kps_max[:, 0:1] - kps_min[:, 0:1]) / 2 + kps_min[:, 0:1]
|
|
kps_out_y = kps_max[:, 1:2]
|
|
kps_out = torch.cat((kps_out_x, kps_out_y), -1)
|
|
|
|
elif mode == 'head':
|
|
kps_out = kps_in[:, :, 0:5].mean(2)
|
|
|
|
elif mode == 'shoulder':
|
|
kps_out = kps_in[:, :, 5:7].mean(2)
|
|
|
|
elif mode == 'hip':
|
|
kps_out = kps_in[:, :, 11:13].mean(2)
|
|
|
|
elif mode == 'ankle':
|
|
kps_out = kps_in[:, :, 15:17].mean(2)
|
|
|
|
return kps_out # (m, 2)
|
|
|
|
|
|
def transform_kp(kps, tr_mode):
|
|
"""Apply different transformations to the keypoints based on the tr_mode"""
|
|
|
|
assert tr_mode in ("None", "singularity", "upper", "lower", "horizontal", "vertical", "lateral",
|
|
'shoulder', 'knee', 'upside', 'falling', 'random')
|
|
|
|
uu_c, vv_c = get_keypoints(kps, mode='center')
|
|
|
|
if tr_mode == "None":
|
|
return kps
|
|
|
|
if tr_mode == "singularity":
|
|
uus = [uu_c for uu in kps[0]]
|
|
vvs = [vv_c for vv in kps[1]]
|
|
|
|
elif tr_mode == "vertical":
|
|
uus = [uu_c for uu in kps[0]]
|
|
vvs = kps[1]
|
|
|
|
elif tr_mode == 'horizontal':
|
|
uus = kps[0]
|
|
vvs = [vv_c for vv in kps[1]]
|
|
|
|
elif tr_mode == 'shoulder':
|
|
uus = kps[0]
|
|
vvs = kps[1][:7] + [kps[1][6] for vv in kps[1][7:]]
|
|
|
|
elif tr_mode == 'knee':
|
|
uus = kps[0]
|
|
vvs = [kps[1][14] for vv in kps[1][:13]] + kps[1][13:]
|
|
|
|
elif tr_mode == 'up':
|
|
uus = kps[0]
|
|
vvs = [kp - 300 for kp in kps[1]]
|
|
|
|
elif tr_mode == 'falling':
|
|
uus = [kps[0][16] - kp + kps[1][16] for kp in kps[1]]
|
|
vvs = [kps[1][16] - kp + kps[0][16] for kp in kps[0]]
|
|
|
|
elif tr_mode == 'random':
|
|
uu_min = min(kps[0])
|
|
uu_max = max(kps[0])
|
|
vv_min = min(kps[1])
|
|
vv_max = max(kps[1])
|
|
np.random.seed(6)
|
|
uus = np.random.uniform(uu_min, uu_max, len(kps[0])).tolist()
|
|
vvs = np.random.uniform(vv_min, vv_max, len(kps[1])).tolist()
|
|
|
|
return [uus, vvs, kps[2], []]
|
|
|
|
|
|
def xyz_from_distance(distances, xy_centers):
|
|
"""
|
|
From distances and normalized image coordinates (z=1), extract the real world position xyz
|
|
distances --> tensor (m,1) or (m) or float
|
|
xy_centers --> tensor(m,3) or (3)
|
|
"""
|
|
|
|
if isinstance(distances, float):
|
|
distances = torch.tensor(distances).unsqueeze(0)
|
|
if len(distances.size()) == 1:
|
|
distances = distances.unsqueeze(1)
|
|
if len(xy_centers.size()) == 1:
|
|
xy_centers = xy_centers.unsqueeze(0)
|
|
|
|
assert xy_centers.size()[-1] == 3 and distances.size()[-1] == 1, "Size of tensor not recognized"
|
|
|
|
return xy_centers * distances / torch.sqrt(1 + xy_centers[:, 0:1].pow(2) + xy_centers[:, 1:2].pow(2))
|
|
|
|
|
|
def open_image(path_image):
|
|
with open(path_image, 'rb') as f:
|
|
pil_image = Image.open(f).convert('RGB')
|
|
return pil_image
|
|
|
|
|
|
def correct_angle(yaw, xyz):
|
|
"""
|
|
Correct the angle from the egocentric (global/ rotation_y)
|
|
to allocentric (camera perspective / observation angle)
|
|
and to be -pi < angle < pi
|
|
"""
|
|
correction = math.atan2(xyz[0], xyz[2])
|
|
yaw = yaw - correction
|
|
if yaw > np.pi:
|
|
yaw -= 2 * np.pi
|
|
elif yaw < -np.pi:
|
|
yaw += 2 * np.pi
|
|
assert -2 * np.pi <= yaw <= 2 * np.pi
|
|
return math.sin(yaw), math.cos(yaw), yaw
|
|
|
|
|
|
def back_correct_angles(yaws, xyz):
|
|
corrections = torch.atan2(xyz[:, 0], xyz[:, 2])
|
|
yaws = yaws + corrections.view(-1, 1)
|
|
mask_up = yaws > math.pi
|
|
yaws[mask_up] -= 2 * math.pi
|
|
mask_down = yaws < -math.pi
|
|
yaws[mask_down] += 2 * math.pi
|
|
assert torch.all(yaws < math.pi) & torch.all(yaws > - math.pi)
|
|
return yaws
|
|
|
|
|
|
def to_spherical(xyz):
|
|
"""convert from cartesian to spherical"""
|
|
xyz = np.array(xyz)
|
|
r = np.linalg.norm(xyz)
|
|
theta = math.atan2(xyz[2], xyz[0])
|
|
|
|
assert 0 <= theta < math.pi # 0 when positive x and no z.
|
|
psi = math.acos(xyz[1] / r)
|
|
assert 0 <= psi <= math.pi
|
|
return [r, theta, psi]
|
|
|
|
|
|
def to_cartesian(rtp, mode=None):
|
|
"""convert from spherical to cartesian"""
|
|
|
|
if isinstance(rtp, torch.Tensor):
|
|
if mode in ('x', 'y'):
|
|
r = rtp[:, 2]
|
|
t = rtp[:, 0]
|
|
p = rtp[:, 1]
|
|
if mode == 'x':
|
|
x = r * torch.sin(p) * torch.cos(t)
|
|
return x.view(-1, 1)
|
|
|
|
if mode == 'y':
|
|
y = r * torch.cos(p)
|
|
return y.view(-1, 1)
|
|
|
|
xyz = rtp.clone()
|
|
xyz[:, 0] = rtp[:, 0] * torch.sin(rtp[:, 2]) * torch.cos(rtp[:, 1])
|
|
xyz[:, 1] = rtp[:, 0] * torch.cos(rtp[:, 2])
|
|
xyz[:, 2] = rtp[:, 0] * torch.sin(rtp[:, 2]) * torch.sin(rtp[:, 1])
|
|
return xyz
|
|
|
|
x = rtp[0] * math.sin(rtp[2]) * math.cos(rtp[1])
|
|
y = rtp[0] * math.cos(rtp[2])
|
|
z = rtp[0] * math.sin(rtp[2]) * math.sin(rtp[1])
|
|
return[x, y, z]
|