refactor predict (#1)
* add stereo arg (#1) * stereo skeleton * skeleton stereo (#2) * skeleton stereo (#3) * refactor for stereo * stereo running * modify stereo parameters * remove warnings as errors * add p2_right * add factory file * add p2_right * add scaling factor to intrinsic matrix * Refactor predict * refactor predict 2 * temp * add uppercase constants * add uppercase constants * working predict * add todo * Add person_sitting as option to uncomment * turn off stereo * remove stereo * pylint corrections * add break point * temp * fix small bug * Add compatibility for pifpaf and monoloco networks * pylint fix
This commit is contained in:
parent
d941a5bdc7
commit
86438189a7
BIN
docs/002282.png.combined.png
Normal file
BIN
docs/002282.png.combined.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 694 KiB |
@ -1,16 +1,22 @@
|
||||
"""Run monoloco over all the pifpaf joints of KITTI images
|
||||
and extract and save the annotations in txt files"""
|
||||
|
||||
|
||||
import torch
|
||||
import math
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import json
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from models.architectures import LinearModel
|
||||
from utils.misc import laplace_sampling
|
||||
from utils.kitti import eval_geometric, get_calibration
|
||||
from utils.normalize import unnormalize_bi
|
||||
from utils.pifpaf import get_input_data, preprocess_pif
|
||||
from utils.camera import get_depth_from_distance
|
||||
|
||||
|
||||
class RunKitti:
|
||||
@ -23,22 +29,17 @@ class RunKitti:
|
||||
average_y = 0.48
|
||||
n_samples = 100
|
||||
|
||||
def __init__(self, model, dir_ann, dropout, hidden_size, n_stage, n_dropout, stereo=False):
|
||||
def __init__(self, model, dir_ann, dropout, hidden_size, n_stage, n_dropout):
|
||||
|
||||
# Set directories
|
||||
assert dir_ann, "Annotations folder is required"
|
||||
self.dir_ann = dir_ann
|
||||
self.n_dropout = n_dropout
|
||||
|
||||
list_ann = glob.glob(os.path.join(dir_ann, '*.json'))
|
||||
self.dir_kk = os.path.join('data', 'kitti', 'calib')
|
||||
self.dir_out = os.path.join('data', 'kitti', 'monoloco')
|
||||
if not os.path.exists(self.dir_out):
|
||||
os.makedirs(self.dir_out)
|
||||
print("Created output directory for txt files")
|
||||
|
||||
self.list_basename = [os.path.basename(x).split('.')[0] for x in list_ann]
|
||||
assert self.list_basename, " Missing json annotations file to create txt files for KITTI datasets"
|
||||
self.list_basename = factory_basename(dir_ann)
|
||||
|
||||
# Load the model
|
||||
input_size = 17 * 2
|
||||
@ -54,86 +55,131 @@ class RunKitti:
|
||||
|
||||
# Run inference
|
||||
for basename in self.list_basename:
|
||||
path_calib = os.path.join(self.dir_kk, basename + '.txt')
|
||||
kk, tt = get_calibration(path_calib)
|
||||
|
||||
path_ann = os.path.join(self.dir_ann, basename + '.png.pifpaf.json')
|
||||
with open(path_ann, 'r') as f:
|
||||
annotations = json.load(f)
|
||||
path_calib = os.path.join(self.dir_kk, basename + '.txt')
|
||||
|
||||
annotations, kk, tt, _ = factory_file(path_calib, self.dir_ann, basename)
|
||||
|
||||
boxes, keypoints = preprocess_pif(annotations)
|
||||
(inputs, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = get_input_data(boxes, keypoints, kk)
|
||||
|
||||
dds_geom, xy_centers = eval_geometric(uv_kps, uv_centers, uv_shoulders, kk, average_y=0.48)
|
||||
|
||||
# Update counting
|
||||
self.cnt_ann += len(boxes)
|
||||
|
||||
inputs = torch.from_numpy(np.array(inputs)).float().to(self.device)
|
||||
|
||||
if len(inputs) == 0:
|
||||
if not inputs:
|
||||
self.cnt_no_file += 1
|
||||
|
||||
else:
|
||||
self.cnt_file += 1
|
||||
|
||||
if self.n_dropout > 0:
|
||||
total_outputs = torch.empty((0, len(uv_boxes))).to(self.device)
|
||||
self.model.dropout.training = True
|
||||
for ii in range(self.n_dropout):
|
||||
outputs = self.model(inputs)
|
||||
outputs = unnormalize_bi(outputs)
|
||||
samples = laplace_sampling(outputs, self.n_samples)
|
||||
total_outputs = torch.cat((total_outputs, samples), 0)
|
||||
varss = total_outputs.std(0)
|
||||
# Run the model
|
||||
inputs = torch.from_numpy(np.array(inputs)).float().to(self.device)
|
||||
if self.n_dropout > 0:
|
||||
total_outputs = torch.empty((0, len(uv_boxes))).to(self.device)
|
||||
self.model.dropout.training = True
|
||||
for _ in range(self.n_dropout):
|
||||
outputs = self.model(inputs)
|
||||
outputs = unnormalize_bi(outputs)
|
||||
samples = laplace_sampling(outputs, self.n_samples)
|
||||
total_outputs = torch.cat((total_outputs, samples), 0)
|
||||
varss = total_outputs.std(0)
|
||||
|
||||
else:
|
||||
varss = [0]*len(uv_boxes)
|
||||
else:
|
||||
varss = [0]*len(uv_boxes)
|
||||
|
||||
# Don't use dropout for the mean prediction and aleatoric uncertainty
|
||||
self.model.dropout.training = False
|
||||
outputs_net = self.model(inputs)
|
||||
outputs = outputs_net.cpu().detach().numpy()
|
||||
# Don't use dropout for the mean prediction and aleatoric uncertainty
|
||||
self.model.dropout.training = False
|
||||
outputs_net = self.model(inputs)
|
||||
outputs = outputs_net.cpu().detach().numpy()
|
||||
|
||||
path_txt = os.path.join(self.dir_out, basename + '.txt')
|
||||
with open(path_txt, "w+") as ff:
|
||||
for idx in range(outputs.shape[0]):
|
||||
xx_1 = float(xy_centers[idx][0])
|
||||
yy_1 = float(xy_centers[idx][1])
|
||||
xy_kp = xy_kps[idx]
|
||||
dd = float(outputs[idx][0])
|
||||
std_ale = math.exp(float(outputs[idx][1])) * dd
|
||||
list_zzs = get_depth_from_distance(outputs, xy_centers)
|
||||
all_outputs = [outputs, varss, dds_geom]
|
||||
all_inputs = [uv_boxes, xy_centers, xy_kps]
|
||||
all_params = [kk, tt]
|
||||
|
||||
zz = dd / math.sqrt(1 + xx_1**2 + yy_1**2)
|
||||
xx_cam_0 = xx_1*zz + tt[0] # Still to verify the sign but negligible
|
||||
yy_cam_0 = yy_1*zz + tt[1]
|
||||
zz_cam_0 = zz + tt[2]
|
||||
dd_cam_0 = math.sqrt(xx_cam_0**2 + yy_cam_0**2 + zz_cam_0**2)
|
||||
|
||||
uv_box = uv_boxes[idx]
|
||||
|
||||
twodecimals = ["%.3f" % vv for vv in [uv_box[0], uv_box[1], uv_box[2], uv_box[3],
|
||||
xx_cam_0, yy_cam_0, zz_cam_0, dd_cam_0,
|
||||
std_ale, varss[idx], uv_box[4], dds_geom[idx]]]
|
||||
|
||||
keypoints_str = ["%.5f" % vv for vv in xy_kp]
|
||||
for item in twodecimals:
|
||||
ff.write("%s " % item)
|
||||
for item in keypoints_str:
|
||||
ff.write("%s " % item)
|
||||
ff.write("\n")
|
||||
|
||||
# Save intrinsic matrix in the last row
|
||||
kk_list = kk.reshape(-1,).tolist()
|
||||
for kk_el in kk_list:
|
||||
ff.write("%f " % kk_el)
|
||||
ff.write("\n")
|
||||
# Save the file
|
||||
all_outputs.append(list_zzs)
|
||||
path_txt = os.path.join(self.dir_out, basename + '.txt')
|
||||
save_txts(path_txt, all_inputs, all_outputs, all_params)
|
||||
aa = 5
|
||||
|
||||
# Print statistics
|
||||
print("Saved in {} txt {} annotations. Not found {} images"
|
||||
.format(self.cnt_file, self.cnt_ann, self.cnt_no_file))
|
||||
|
||||
|
||||
|
||||
def save_txts(path_txt, all_inputs, all_outputs, all_params):
|
||||
|
||||
outputs, varss, dds_geom, zzs = all_outputs[:]
|
||||
uv_boxes, xy_centers, xy_kps = all_inputs[:]
|
||||
kk, tt = all_params[:]
|
||||
|
||||
with open(path_txt, "w+") as ff:
|
||||
for idx in range(outputs.shape[0]):
|
||||
xx_1 = float(xy_centers[idx][0])
|
||||
yy_1 = float(xy_centers[idx][1])
|
||||
xy_kp = xy_kps[idx]
|
||||
dd = float(outputs[idx][0])
|
||||
std_ale = math.exp(float(outputs[idx][1])) * dd
|
||||
zz = zzs[idx]
|
||||
xx_cam_0 = xx_1 * zz + tt[0]
|
||||
yy_cam_0 = yy_1 * zz + tt[1]
|
||||
zz_cam_0 = zz + tt[2]
|
||||
dd_cam_0 = math.sqrt(xx_cam_0 ** 2 + yy_cam_0 ** 2 + zz_cam_0 ** 2)
|
||||
|
||||
uv_box = uv_boxes[idx]
|
||||
|
||||
twodecimals = ["%.3f" % vv for vv in [uv_box[0], uv_box[1], uv_box[2], uv_box[3],
|
||||
xx_cam_0, yy_cam_0, zz_cam_0, dd_cam_0,
|
||||
std_ale, varss[idx], uv_box[4], dds_geom[idx]]]
|
||||
|
||||
keypoints_str = ["%.5f" % vv for vv in xy_kp]
|
||||
for item in twodecimals:
|
||||
ff.write("%s " % item)
|
||||
for item in keypoints_str:
|
||||
ff.write("%s " % item)
|
||||
ff.write("\n")
|
||||
|
||||
# Save intrinsic matrix in the last row
|
||||
kk_list = kk.reshape(-1, ).tolist()
|
||||
for kk_el in kk_list:
|
||||
ff.write("%f " % kk_el)
|
||||
ff.write("\n")
|
||||
|
||||
|
||||
|
||||
def factory_basename(dir_ann):
|
||||
""" Return all the basenames in the annotations folder"""
|
||||
|
||||
list_ann = glob.glob(os.path.join(dir_ann, '*.json'))
|
||||
list_basename = [os.path.basename(x).split('.')[0] for x in list_ann]
|
||||
assert list_basename, " Missing json annotations file to create txt files for KITTI datasets"
|
||||
|
||||
return list_basename
|
||||
|
||||
|
||||
def factory_file(path_calib, dir_ann, basename, ite=0):
|
||||
"""Choose the annotation and the calibration files. Stereo option with ite = 1"""
|
||||
|
||||
stereo_file = True
|
||||
p_left, p_right = get_calibration(path_calib)
|
||||
|
||||
if ite == 0:
|
||||
kk, tt = p_left[:]
|
||||
path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json')
|
||||
else:
|
||||
kk, tt = p_right[:]
|
||||
path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json')
|
||||
|
||||
try:
|
||||
with open(path_ann, 'r') as f:
|
||||
annotations = json.load(f)
|
||||
except FileNotFoundError:
|
||||
annotations = None
|
||||
if ite == 1:
|
||||
stereo_file = False
|
||||
|
||||
return annotations, kk, tt, stereo_file
|
||||
|
||||
|
||||
|
||||
|
||||
@ -64,7 +64,8 @@ class PreprocessKitti:
|
||||
|
||||
# Extract keypoints
|
||||
path_txt = os.path.join(self.dir_kk, basename + '.txt')
|
||||
kk, tt = get_calibration(path_txt)
|
||||
p_left, _ = get_calibration(path_txt)
|
||||
kk = p_left[0]
|
||||
|
||||
# Iterate over each line of the gt file and save box location and distances
|
||||
boxes_gt, dds_gt, _, _ = parse_ground_truth(path_gt)
|
||||
|
||||
@ -9,7 +9,7 @@ from openpifpaf.network import nets
|
||||
from openpifpaf import decoder
|
||||
from features.preprocess_nu import PreprocessNuscenes
|
||||
from features.preprocess_ki import PreprocessKitti
|
||||
from predict.predict_2d_3d import predict
|
||||
from predict.predict import predict
|
||||
from models.trainer import Trainer
|
||||
from eval.run_kitti import RunKitti
|
||||
from eval.geom_baseline import GeomBaseline
|
||||
|
||||
91
src/predict/factory.py
Normal file
91
src/predict/factory.py
Normal file
@ -0,0 +1,91 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
from visuals.printer import Printer
|
||||
from openpifpaf import show
|
||||
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def factory_for_gt(image, name=None, path_gt=None):
|
||||
"""Look for ground-truth annotations file and define calibration matrix based on image size """
|
||||
|
||||
try:
|
||||
with open(path_gt, 'r') as f:
|
||||
dic_names = json.load(f)
|
||||
print('-' * 120 + "\nMonoloco: Ground-truth file opened\n")
|
||||
except FileNotFoundError:
|
||||
print('-' * 120 + "\nMonoloco: ground-truth file not found\n")
|
||||
dic_names = {}
|
||||
|
||||
try:
|
||||
kk = dic_names[name]['K']
|
||||
dic_gt = dic_names[name]
|
||||
print("Monoloco: matched ground-truth file!\n" + '-' * 120)
|
||||
except KeyError:
|
||||
dic_gt = None
|
||||
x_factor = image.size[0] / 1600
|
||||
y_factor = image.size[1] / 900
|
||||
pixel_factor = (x_factor + y_factor) / 2
|
||||
if image.size[0] / image.size[1] > 2.5:
|
||||
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
|
||||
else:
|
||||
kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor],
|
||||
[0, 1266.4 * pixel_factor, 491.5 * y_factor],
|
||||
[0., 0., 1.]] # nuScenes calibration
|
||||
|
||||
print("Ground-truth annotations for the image not found\n"
|
||||
"Using a standard calibration matrix...\n" + '-' * 120)
|
||||
|
||||
return kk, dic_gt
|
||||
|
||||
|
||||
def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_outputs=None, kk=None):
|
||||
"""Output json files or images according to the choice"""
|
||||
|
||||
# Save json file
|
||||
if 'pifpaf' in args.networks:
|
||||
|
||||
keypoint_sets, pifpaf_out, scores = pifpaf_outputs[:]
|
||||
|
||||
# Visualizer
|
||||
keypoint_painter = show.KeypointPainter(show_box=True)
|
||||
skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
|
||||
markersize=1, linewidth=4)
|
||||
|
||||
if 'json' in args.output_types and keypoint_sets.size > 0:
|
||||
with open(output_path + '.pifpaf.json', 'w') as f:
|
||||
json.dump(pifpaf_out, f)
|
||||
|
||||
if 'keypoints' in args.output_types:
|
||||
with show.image_canvas(images_outputs[0],
|
||||
output_path + '.keypoints.png',
|
||||
show=args.show,
|
||||
fig_width=args.figure_width,
|
||||
dpi_factor=args.dpi_factor) as ax:
|
||||
keypoint_painter.keypoints(ax, keypoint_sets)
|
||||
|
||||
if 'skeleton' in args.output_types:
|
||||
with show.image_canvas(images_outputs[0],
|
||||
output_path + '.skeleton.png',
|
||||
show=args.show,
|
||||
fig_width=args.figure_width,
|
||||
dpi_factor=args.dpi_factor) as ax:
|
||||
skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
|
||||
|
||||
if 'monoloco' in args.networks:
|
||||
if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])):
|
||||
|
||||
epistemic = False
|
||||
if args.n_dropout > 0:
|
||||
epistemic = True
|
||||
|
||||
printer = Printer(images_outputs[1], output_path, monoloco_outputs, kk, output_types=args.output_types,
|
||||
show=args.show, z_max=args.z_max, epistemic=epistemic)
|
||||
printer.print()
|
||||
|
||||
if 'json' in args.output_types:
|
||||
with open(os.path.join(args.output_path + '.monoloco.json'), 'w') as ff:
|
||||
json.dump(monoloco_outputs, ff)
|
||||
|
||||
|
||||
@ -1,65 +1,51 @@
|
||||
|
||||
"""
|
||||
From a json file output images and json annotations
|
||||
Monoloco predictor. It receives pifpaf joints and outputs distances
|
||||
"""
|
||||
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
from models.architectures import LinearModel
|
||||
from visuals.printer import Printer
|
||||
from utils.camera import get_depth
|
||||
from utils.misc import laplace_sampling, get_idx_max
|
||||
from utils.normalize import unnormalize_bi
|
||||
from utils.pifpaf import get_input_data
|
||||
|
||||
|
||||
class PredictMonoLoco:
|
||||
class MonoLoco:
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
output_size = 2
|
||||
input_size = 17 * 2
|
||||
OUTPUT_SIZE = 2
|
||||
INPUT_SIZE = 17 * 2
|
||||
LINEAR_SIZE = 256
|
||||
IOU_MIN = 0.25
|
||||
N_SAMPLES = 100
|
||||
|
||||
def __init__(self, boxes, keypoints, image_path, output_path, args):
|
||||
self.boxes = boxes
|
||||
self.keypoints = keypoints
|
||||
self.image_path = image_path
|
||||
self.output_path = output_path
|
||||
self.device = args.device
|
||||
self.draw_kps = args.draw_kps
|
||||
self.z_max = args.z_max
|
||||
self.output_types = args.output_types
|
||||
self.path_gt = args.path_gt
|
||||
self.show = args.show
|
||||
self.n_samples = 100
|
||||
self.n_dropout = args.n_dropout
|
||||
def __init__(self, model, device, n_dropout=0):
|
||||
|
||||
self.device = device
|
||||
self.n_dropout = n_dropout
|
||||
if self.n_dropout > 0:
|
||||
self.epistemic = True
|
||||
else:
|
||||
self.epistemic = False
|
||||
self.iou_min = 0.25
|
||||
|
||||
# load the model parameters
|
||||
self.model = LinearModel(input_size=self.input_size, output_size=self.output_size, linear_size=args.hidden_size)
|
||||
self.model.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage))
|
||||
self.model = LinearModel(input_size=self.INPUT_SIZE, output_size=self.OUTPUT_SIZE, linear_size=self.LINEAR_SIZE)
|
||||
self.model.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage))
|
||||
self.model.eval() # Default is train
|
||||
self.model.to(self.device)
|
||||
|
||||
def run(self):
|
||||
# Extract calibration matrix if ground-truth file is present or use a default one
|
||||
cnt = 0
|
||||
dic_names, kk = factory_for_gt(self.path_gt, self.image_path)
|
||||
def forward(self, boxes, keypoints, kk, dic_gt=None):
|
||||
|
||||
(inputs_norm, xy_kps), (uv_kps, uv_boxes, uv_centers, uv_shoulders) = \
|
||||
get_input_data(self.boxes, self.keypoints, kk, left_to_right=True)
|
||||
get_input_data(boxes, keypoints, kk, left_to_right=True)
|
||||
|
||||
# Conversion into torch tensor
|
||||
if inputs_norm:
|
||||
@ -76,7 +62,7 @@ class PredictMonoLoco:
|
||||
for _ in range(self.n_dropout):
|
||||
outputs = self.model(inputs)
|
||||
outputs = unnormalize_bi(outputs)
|
||||
samples = laplace_sampling(outputs, self.n_samples)
|
||||
samples = laplace_sampling(outputs, self.N_SAMPLES)
|
||||
total_outputs = torch.cat((total_outputs, samples), 0)
|
||||
varss = total_outputs.std(0)
|
||||
else:
|
||||
@ -92,11 +78,10 @@ class PredictMonoLoco:
|
||||
.format(self.n_dropout, (end-start) * 1000))
|
||||
print("Single forward pass time = {:.2f} ms".format((end - start_single) * 1000))
|
||||
|
||||
# Print image and save json
|
||||
# Create output files
|
||||
dic_out = defaultdict(list)
|
||||
if dic_names:
|
||||
name = os.path.basename(self.image_path)
|
||||
boxes_gt, dds_gt = dic_names[name]['boxes'], dic_names[name]['dds']
|
||||
if dic_gt:
|
||||
boxes_gt, dds_gt = dic_gt['boxes'], dic_gt['dds']
|
||||
|
||||
for idx, box in enumerate(uv_boxes):
|
||||
dd_pred = float(outputs[idx][0])
|
||||
@ -104,9 +89,9 @@ class PredictMonoLoco:
|
||||
var_y = float(varss[idx])
|
||||
|
||||
# Find the corresponding ground truth if available
|
||||
if dic_names:
|
||||
if dic_gt:
|
||||
idx_max, iou_max = get_idx_max(box, boxes_gt)
|
||||
if iou_max > self.iou_min:
|
||||
if iou_max > self.IOU_MIN:
|
||||
dd_real = dds_gt[idx_max]
|
||||
boxes_gt.pop(idx_max)
|
||||
dds_gt.pop(idx_max)
|
||||
@ -132,42 +117,4 @@ class PredictMonoLoco:
|
||||
dic_out['uv_centers'].append(uv_center)
|
||||
dic_out['uv_shoulders'].append(uv_shoulders[idx])
|
||||
|
||||
if any((xx in self.output_types for xx in ['front', 'bird', 'combined'])):
|
||||
printer = Printer(self.image_path, self.output_path, dic_out, kk, output_types=self.output_types,
|
||||
show=self.show, z_max=self.z_max, epistemic=self.epistemic)
|
||||
printer.print()
|
||||
|
||||
if 'json' in self.output_types:
|
||||
with open(os.path.join(self.output_path + '.monoloco.json'), 'w') as ff:
|
||||
json.dump(dic_out, ff)
|
||||
|
||||
sys.stdout.write('\r' + 'Saving image {}'.format(cnt) + '\t')
|
||||
|
||||
|
||||
def factory_for_gt(path_gt, image_path):
|
||||
"""Look for ground-truth annotations file and define calibration matrix based on image size """
|
||||
|
||||
try:
|
||||
with open(path_gt, 'r') as f:
|
||||
dic_names = json.load(f)
|
||||
print('-' * 120 + "\nMonoloco: Ground-truth file opened\n")
|
||||
except FileNotFoundError:
|
||||
print('-' * 120 + "\nMonoloco: ground-truth file not found\n")
|
||||
dic_names = {}
|
||||
|
||||
try:
|
||||
name = os.path.basename(image_path)
|
||||
kk = dic_names[name]['K']
|
||||
print("Monoloco: matched ground-truth file!\n" + '-' * 120)
|
||||
except KeyError:
|
||||
dic_names = None
|
||||
with open(image_path, 'rb') as f:
|
||||
im = Image.open(f)
|
||||
if im.size[0] / im.size[1] > 2.5:
|
||||
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
|
||||
else:
|
||||
kk = [[1266.4, 0., 816.27], [0, 1266.4, 491.5], [0., 0., 1.]] # Nuscenes calibration
|
||||
print("Ground-truth annotations for the image not found\n"
|
||||
"Using a standard calibration matrix...\n" + '-' * 120)
|
||||
|
||||
return dic_names, kk
|
||||
return dic_out
|
||||
@ -1,26 +1,26 @@
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
from openpifpaf.network import nets
|
||||
from openpifpaf import decoder, show
|
||||
from openpifpaf import transforms
|
||||
from predict.predict_monoloco import PredictMonoLoco
|
||||
from utils.pifpaf import preprocess_pif
|
||||
|
||||
import torchvision
|
||||
import torch
|
||||
|
||||
from PIL import Image, ImageFile
|
||||
|
||||
from openpifpaf.network import nets
|
||||
from openpifpaf import decoder
|
||||
from openpifpaf import transforms
|
||||
from predict.monoloco import MonoLoco
|
||||
from predict.factory import factory_for_gt, factory_outputs
|
||||
from utils.pifpaf import preprocess_pif
|
||||
|
||||
|
||||
class ImageList(torch.utils.data.Dataset):
|
||||
"""It defines transformations to apply to images and outputs of the dataloader"""
|
||||
def __init__(self, image_paths, scale, image_transform=None):
|
||||
self.image_paths = image_paths
|
||||
self.image_transform = image_transform or transforms.image_transform
|
||||
self.image_transform = image_transform or transforms.image_transform # to_tensor + normalize (from pifpaf)
|
||||
self.scale = scale
|
||||
|
||||
# data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2
|
||||
@ -37,7 +37,8 @@ class ImageList(torch.utils.data.Dataset):
|
||||
(round(self.scale * image.size[1]),
|
||||
round(self.scale * image.size[0])),
|
||||
interpolation=Image.BICUBIC)
|
||||
original_image = torchvision.transforms.functional.to_tensor(image)
|
||||
# PIL images are not iterables
|
||||
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
|
||||
image = self.image_transform(image)
|
||||
|
||||
return image_path, original_image, image
|
||||
@ -76,12 +77,16 @@ def factory_from_args(args):
|
||||
|
||||
def predict(args):
|
||||
|
||||
cnt = 0
|
||||
factory_from_args(args)
|
||||
|
||||
# load model
|
||||
model, _ = nets.factory_from_args(args)
|
||||
model = model.to(args.device)
|
||||
processor = decoder.factory_from_args(args, model)
|
||||
# load pifpaf model
|
||||
model_pifpaf, _ = nets.factory_from_args(args)
|
||||
model_pifpaf = model_pifpaf.to(args.device)
|
||||
processor = decoder.factory_from_args(args, model_pifpaf)
|
||||
|
||||
# load monoloco
|
||||
monoloco = MonoLoco(model=args.model, device=args.device, n_dropout=args.n_dropout)
|
||||
|
||||
# data
|
||||
data = ImageList(args.images, scale=args.scale)
|
||||
@ -89,11 +94,6 @@ def predict(args):
|
||||
data, batch_size=1, shuffle=False,
|
||||
pin_memory=args.pin_memory, num_workers=args.loader_workers)
|
||||
|
||||
# Visualizer
|
||||
keypoint_painter = show.KeypointPainter(show_box=True)
|
||||
skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
|
||||
markersize=1, linewidth=4)
|
||||
|
||||
keypoints_whole = []
|
||||
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
|
||||
images = image_tensors.permute(0, 2, 3, 1)
|
||||
@ -121,45 +121,42 @@ def predict(args):
|
||||
# Correct to not change the confidence
|
||||
scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
|
||||
|
||||
if keypoint_sets.size > 0:
|
||||
keypoints_whole.append(np.around((keypoint_sets / scale_np), 1)
|
||||
.reshape(keypoint_sets.shape[0], -1).tolist())
|
||||
|
||||
pifpaf_out = [
|
||||
{'keypoints': np.around(kps / scale_np, 1).reshape(-1).tolist(),
|
||||
'bbox': [np.min(kps[:, 0]) / args.scale, np.min(kps[:, 1]) / args.scale,
|
||||
np.max(kps[:, 0]) / args.scale, np.max(kps[:, 1]) / args.scale]}
|
||||
for kps in keypoint_sets
|
||||
]
|
||||
|
||||
# Save json file
|
||||
if 'pifpaf' in args.networks:
|
||||
|
||||
if 'json' in args.output_types and keypoint_sets.size > 0:
|
||||
with open(output_path + '.pifpaf.json', 'w') as f:
|
||||
json.dump(pifpaf_out, f)
|
||||
|
||||
if keypoint_sets.size > 0:
|
||||
keypoints_whole.append(np.around((keypoint_sets / scale_np), 1)
|
||||
.reshape(keypoint_sets.shape[0], -1).tolist())
|
||||
|
||||
if 'keypoints' in args.output_types:
|
||||
with show.image_canvas(image,
|
||||
output_path + '.keypoints.png',
|
||||
show=args.show,
|
||||
fig_width=args.figure_width,
|
||||
dpi_factor=args.dpi_factor) as ax:
|
||||
keypoint_painter.keypoints(ax, keypoint_sets)
|
||||
|
||||
if 'skeleton' in args.output_types:
|
||||
with show.image_canvas(image,
|
||||
output_path + '.skeleton.png',
|
||||
show=args.show,
|
||||
fig_width=args.figure_width,
|
||||
dpi_factor=args.dpi_factor) as ax:
|
||||
skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
|
||||
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
|
||||
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
|
||||
|
||||
if 'monoloco' in args.networks:
|
||||
im_size = (float(image.size()[1] / args.scale),
|
||||
float(image.size()[0] / args.scale)) # Width, Height (original)
|
||||
boxes, keypoints = preprocess_pif(pifpaf_out, im_size)
|
||||
predict_monoloco = PredictMonoLoco(boxes, keypoints, image_path, output_path, args)
|
||||
predict_monoloco.run()
|
||||
|
||||
# Extract calibration matrix and ground truth file if present
|
||||
|
||||
with open(image_path, 'rb') as f:
|
||||
pil_image = Image.open(f).convert('RGB')
|
||||
images_outputs.append(pil_image)
|
||||
|
||||
im_name = os.path.basename(image_path)
|
||||
|
||||
kk, _ = factory_for_gt(image, name=im_name, path_gt=args.path_gt)
|
||||
|
||||
# Preprocess pifpaf outputs and run monoloco
|
||||
boxes, keypoints = preprocess_pif(pifpaf_out, im_size)
|
||||
monoloco_outputs = monoloco.forward(boxes, keypoints, kk)
|
||||
else:
|
||||
monoloco_outputs = None
|
||||
kk = None
|
||||
|
||||
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_outputs=monoloco_outputs, kk=kk)
|
||||
sys.stdout.write('\r' + 'Saving image {}'.format(cnt) + '\t')
|
||||
cnt += 1
|
||||
return keypoints_whole
|
||||
|
||||
@ -193,3 +193,15 @@ def get_depth(uv_center, kk, dd):
|
||||
|
||||
xyz = pixel_to_camera(uv_center_np, kk, zz).tolist()
|
||||
return xyz
|
||||
|
||||
|
||||
def get_depth_from_distance(outputs, xy_centers):
|
||||
|
||||
list_zzs = []
|
||||
for idx, _ in enumerate(outputs):
|
||||
dd = float(outputs[idx][0])
|
||||
xx_1 = float(xy_centers[idx][0])
|
||||
yy_1 = float(xy_centers[idx][1])
|
||||
zz = dd / math.sqrt(1 + xx_1 ** 2 + yy_1 ** 2)
|
||||
list_zzs.append(zz)
|
||||
return list_zzs
|
||||
|
||||
@ -60,14 +60,24 @@ def get_calibration(path_txt):
|
||||
p2_list = [float(xx) for xx in p2_str]
|
||||
p2 = np.array(p2_list).reshape(3, 4)
|
||||
|
||||
kk = p2[:, :-1]
|
||||
p3_str = file[3].split()[1:]
|
||||
p3_list = [float(xx) for xx in p3_str]
|
||||
p3 = np.array(p3_list).reshape(3, 4)
|
||||
|
||||
kk, tt = get_translation(p2)
|
||||
kk_right, tt_right = get_translation(p3)
|
||||
|
||||
return [kk, tt], [kk_right, tt_right]
|
||||
|
||||
|
||||
def get_translation(pp):
|
||||
"""Separate intrinsic matrix from translation"""
|
||||
|
||||
kk = pp[:, :-1]
|
||||
f_x = kk[0, 0]
|
||||
f_y = kk[1, 1]
|
||||
x0 = kk[2, 0]
|
||||
y0 = kk[2, 1]
|
||||
aa = p2[0, 3]
|
||||
bb = p2[1, 3]
|
||||
t3 = p2[2, 3]
|
||||
x0, y0 = kk[2, 0:2]
|
||||
aa, bb, t3 = pp[0:3, 3]
|
||||
t1 = (aa - x0*t3) / f_x
|
||||
t2 = (bb - y0*t3) / f_y
|
||||
tt = np.array([t1, t2, t3]).reshape(3, 1)
|
||||
@ -102,6 +112,7 @@ def check_conditions(line, mode, thresh=0.5):
|
||||
check = True
|
||||
|
||||
elif mode == 'gt':
|
||||
# if line[:10] == 'Pedestrian' or line[:10] == 'Person_sit':
|
||||
if line[:10] == 'Pedestrian':
|
||||
check = True
|
||||
|
||||
|
||||
@ -204,4 +204,3 @@ def append_cluster(dic_jo, phase, xx, dd, kps):
|
||||
dic_jo[phase]['clst']['>30']['X'].append(xx)
|
||||
dic_jo[phase]['clst']['>30']['Y'].append([dd])
|
||||
|
||||
|
||||
|
||||
49
src/utils/stereo.py
Normal file
49
src/utils/stereo.py
Normal file
@ -0,0 +1,49 @@
|
||||
|
||||
import copy
|
||||
import numpy as np
|
||||
|
||||
|
||||
def depth_from_disparity(zzs, zzs_right, kps, kps_right):
|
||||
"""Associate instances in left and right images and compute disparity"""
|
||||
|
||||
zzs_stereo = []
|
||||
cnt = 0
|
||||
for idx, zz in enumerate(zzs):
|
||||
|
||||
# Find the closest human in terms of distance
|
||||
zz_stereo, idx_min, delta_d_min = calculate_disparity(zz, zzs_right, kps[idx], kps_right)
|
||||
if delta_d_min < 1:
|
||||
zzs_stereo.append(zz_stereo)
|
||||
zzs_right.pop(idx_min)
|
||||
kps_right.pop(idx_min)
|
||||
cnt += 1
|
||||
else:
|
||||
zzs_stereo.append(zz)
|
||||
|
||||
return zzs_stereo, cnt
|
||||
|
||||
|
||||
def calculate_disparity(zz, zzs_right, kp, kps_right):
|
||||
"""From 2 sets of keypoints calculate disparity as the median of the disparities"""
|
||||
|
||||
kp = np.array(copy.deepcopy(kp))
|
||||
kps_right = np.array(copy.deepcopy(kps_right))
|
||||
zz_stereo = 0
|
||||
idx_min = 0
|
||||
delta_z_min = 4
|
||||
|
||||
for idx, zz_right in enumerate(zzs_right):
|
||||
delta_z = abs(zz - zz_right)
|
||||
diffs = np.array(np.array(kp[0] - kps_right[idx][0]))
|
||||
diff = np.mean(diffs)
|
||||
|
||||
# Check only for right instances (5 pxls = 80meters)
|
||||
if delta_z < delta_z_min and diff > 5:
|
||||
delta_z_min = delta_z
|
||||
idx_min = idx
|
||||
zzs = 0.54 * 721 / diffs
|
||||
zz_stereo = np.median(zzs[kp[2] > 0])
|
||||
|
||||
return zz_stereo, idx_min, delta_z_min
|
||||
|
||||
|
||||
@ -9,17 +9,23 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable
|
||||
from matplotlib.patches import Ellipse, Circle
|
||||
import cv2
|
||||
from collections import OrderedDict
|
||||
from PIL import Image
|
||||
from utils.camera import pixel_to_camera
|
||||
|
||||
|
||||
class Printer:
|
||||
"""
|
||||
Print results on images: birds eye view and computed distance
|
||||
"""
|
||||
RADIUS_KPS = 6
|
||||
FONTSIZE_BV = 16
|
||||
FONTSIZE = 18
|
||||
TEXTCOLOR = 'darkorange'
|
||||
COLOR_KPS = 'yellow'
|
||||
|
||||
def __init__(self, image_path, output_path, dic_ann, kk, output_types, show=False,
|
||||
def __init__(self, image, output_path, dic_ann, kk, output_types, show=False,
|
||||
draw_kps=False, text=True, legend=True, epistemic=False, z_max=30, fig_width=10):
|
||||
|
||||
self.im = image
|
||||
self.kk = kk
|
||||
self.output_types = output_types
|
||||
self.show = show
|
||||
@ -27,13 +33,9 @@ class Printer:
|
||||
self.text = text
|
||||
self.epistemic = epistemic
|
||||
self.legend = legend
|
||||
self.z_max = z_max # To include ellipses in the image
|
||||
self.z_max = z_max # To include ellipses in the image
|
||||
self.fig_width = fig_width
|
||||
|
||||
from utils.camera import pixel_to_camera, get_depth
|
||||
self.pixel_to_camera = pixel_to_camera
|
||||
self.get_depth = get_depth
|
||||
|
||||
# Define the output dir
|
||||
self.path_out = output_path
|
||||
|
||||
@ -52,12 +54,10 @@ class Printer:
|
||||
self.uv_shoulders = dic_ann['uv_shoulders']
|
||||
self.uv_kps = dic_ann['uv_kps']
|
||||
|
||||
# Load the image
|
||||
with open(image_path, 'rb') as f:
|
||||
self.im = Image.open(f).convert('RGB')
|
||||
|
||||
self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1])
|
||||
self.ww = self.im.size[0]
|
||||
self.hh = self.im.size[1]
|
||||
self.radius = 14 / 1600 * self.ww
|
||||
|
||||
def print(self):
|
||||
"""
|
||||
@ -66,22 +66,15 @@ class Printer:
|
||||
Either front and/or bird visualization or combined one
|
||||
"""
|
||||
# Parameters
|
||||
radius = 14
|
||||
radius_kps = 6
|
||||
fontsize_bv = 16
|
||||
fontsize = 18
|
||||
textcolor = 'darkorange'
|
||||
color_kps = 'yellow'
|
||||
|
||||
# Resize image for aesthetic proportions in combined visualization
|
||||
if 'combined' in self.output_types:
|
||||
ww = self.im.size[0]
|
||||
hh = self.im.size[1]
|
||||
y_scale = ww / (hh * 1.8) # Defined proportion
|
||||
self.im = self.im.resize((ww, round(hh * y_scale)))
|
||||
print(y_scale)
|
||||
width = self.fig_width + 0.6 * self.fig_width
|
||||
height = self.fig_width * self.im.size[1] / self.im.size[0]
|
||||
y_scale = self.ww / (self.hh * 1.8) # Defined proportion
|
||||
self.im = self.im.resize((self.ww, round(self.hh * y_scale)))
|
||||
self.ww = self.im.size[0]
|
||||
self.hh = self.im.size[1]
|
||||
fig_width = self.fig_width + 0.6 * self.fig_width
|
||||
fig_height = self.fig_width * self.hh / self.ww
|
||||
|
||||
# Distinguish between KITTI images and general images
|
||||
if y_scale > 1.7:
|
||||
@ -92,7 +85,7 @@ class Printer:
|
||||
ext = '.combined.png'
|
||||
|
||||
fig, (ax1, ax0) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [1, width_ratio]},
|
||||
figsize=(width, height))
|
||||
figsize=(fig_width, fig_height))
|
||||
ax1.set_aspect(fig_ar_1)
|
||||
fig.set_tight_layout(True)
|
||||
fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02)
|
||||
@ -104,7 +97,7 @@ class Printer:
|
||||
elif 'front' in self.output_types:
|
||||
y_scale = 1
|
||||
width = self.fig_width
|
||||
height = self.fig_width * self.im.size[1] / self.im.size[0]
|
||||
height = self.fig_width * self.hh / self.ww
|
||||
|
||||
plt.figure(0)
|
||||
fig0, ax0 = plt.subplots(1, 1, figsize=(width, height))
|
||||
@ -114,8 +107,8 @@ class Printer:
|
||||
if any(xx in self.output_types for xx in ['front', 'combined']):
|
||||
|
||||
ax0.set_axis_off()
|
||||
ax0.set_xlim(0, self.im.size[0])
|
||||
ax0.set_ylim(self.im.size[1], 0)
|
||||
ax0.set_xlim(0, self.ww)
|
||||
ax0.set_ylim(self.hh, 0)
|
||||
ax0.imshow(self.im)
|
||||
z_min = 0
|
||||
bar_ticks = self.z_max // 5 + 1
|
||||
@ -125,16 +118,16 @@ class Printer:
|
||||
for idx, uv in enumerate(self.uv_shoulders):
|
||||
|
||||
if self.draw_kps:
|
||||
ax0 = self.show_kps(ax0, self.uv_kps[idx], y_scale, radius_kps, color_kps)
|
||||
ax0 = self.show_kps(ax0, self.uv_kps[idx], y_scale, self.RADIUS_KPS, self.COLOR_KPS)
|
||||
|
||||
elif min(self.zz_pred[idx], self.zz_gt[idx]) > 0:
|
||||
color = cmap((self.zz_pred[idx] % self.z_max) / self.z_max)
|
||||
circle = Circle((uv[0], uv[1] * y_scale), radius=radius, color=color, fill=True)
|
||||
circle = Circle((uv[0], uv[1] * y_scale), radius=self.radius, color=color, fill=True)
|
||||
ax0.add_patch(circle)
|
||||
|
||||
if self.text:
|
||||
ax0.text(uv[0]+radius, uv[1] * y_scale - radius, str(num),
|
||||
fontsize=fontsize, color=textcolor, weight='bold')
|
||||
ax0.text(uv[0]+self.radius, uv[1] * y_scale - self.radius, str(num),
|
||||
fontsize=self.FONTSIZE, color=self.TEXTCOLOR, weight='bold')
|
||||
num += 1
|
||||
|
||||
ax0.get_xaxis().set_visible(False)
|
||||
@ -166,7 +159,7 @@ class Printer:
|
||||
# Create bird or combine it with front)
|
||||
if any(xx in self.output_types for xx in ['bird', 'combined']):
|
||||
uv_max = np.array([0, self.hh, 1])
|
||||
xyz_max = self.pixel_to_camera(uv_max, self.kk, self.z_max)
|
||||
xyz_max = pixel_to_camera(uv_max, self.kk, self.z_max)
|
||||
x_max = abs(xyz_max[0]) # shortcut to avoid oval circles in case of different kk
|
||||
|
||||
for idx, _ in enumerate(self.xx_gt):
|
||||
@ -189,8 +182,8 @@ class Printer:
|
||||
height=1, angle=angle, color='b', fill=False, label="Aleatoric Uncertainty",
|
||||
linewidth=1.3)
|
||||
ellipse_var = Ellipse((self.xx_pred[idx], self.zz_pred[idx]), width=self.stds_ale_epi[idx] * 2,
|
||||
height=1, angle=angle, color='r', fill=False, label="Uncertainty", linewidth=1,
|
||||
linestyle='--')
|
||||
height=1, angle=angle, color='r', fill=False, label="Uncertainty",
|
||||
linewidth=1, linestyle='--')
|
||||
|
||||
ax1.add_patch(ellipse_ale)
|
||||
if self.epistemic:
|
||||
@ -203,7 +196,7 @@ class Printer:
|
||||
(_, x_pos), (_, z_pos) = get_confidence(self.xx_pred[idx], self.zz_pred[idx], self.stds_ale_epi[idx])
|
||||
|
||||
if self.text:
|
||||
ax1.text(x_pos, z_pos, str(num), fontsize=fontsize_bv, color='darkorange')
|
||||
ax1.text(x_pos, z_pos, str(num), fontsize=self.FONTSIZE_BV, color='darkorange')
|
||||
num += 1
|
||||
|
||||
# To avoid repetitions in the legend
|
||||
@ -219,6 +212,10 @@ class Printer:
|
||||
ax1.set_xlabel("X [m]")
|
||||
ax1.set_ylabel("Z [m]")
|
||||
|
||||
# TO remove axis numbers
|
||||
# plt.setp([ax1.get_yticklabels() for aa in fig.axes[:-1]], visible=False)
|
||||
# plt.setp([ax1.get_xticklabels() for aa in fig.axes[:-1]], visible=False)
|
||||
|
||||
if self.show:
|
||||
plt.show()
|
||||
else:
|
||||
@ -227,9 +224,7 @@ class Printer:
|
||||
if self.draw_kps:
|
||||
im = cv2.imread(self.path_out + ext)
|
||||
im = self.increase_brightness(im, value=30)
|
||||
hh = im.size[1]
|
||||
ww = im.size[0]
|
||||
im_new = im[0:hh, 0:round(ww/1.7)]
|
||||
im_new = im[0 : self.hh, 0:round(self.ww / 1.7)]
|
||||
cv2.imwrite(self.path_out, im_new)
|
||||
|
||||
plt.close('all')
|
||||
@ -243,7 +238,8 @@ class Printer:
|
||||
|
||||
return ax0
|
||||
|
||||
def increase_brightness(self, img, value=30):
|
||||
@staticmethod
|
||||
def increase_brightness(img, value=30):
|
||||
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
||||
h, s, v = cv2.split(hsv)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user