* Add precision metrics * add mode gt_all and change default threshold * add cyclists * add iou matrix' ' * add cyclists only in training phase * add dropout in model name * small typos * small typo * fix error on uv_boxes * change default mode from gt_ped to gt * 2 decimals * fix name bug * refactor prepare_pif_kps * corrected get_keypoints_batch * add pixel to camera for 3d vectors * preprocessing in torch * return original outputs * Skeleton for post_process * baseline version for post processing * add keypoints torch in post_processing * cleaning misc * add reorder_matches * update preprocess with get_iou_matches * fix indices * remove aa * temp * skeleton kitti_generate * skeleton kitti_generate (2) * refactor file * remove old get_input_data * refactor geometric eval * refactor geometric eval(2) * temp * refactor geometric * change saving order for txts * update pixel to camera * update depth * Fix pixel to camera * add xyz_from_distance * use new function * fix std_ale calculation in eval * remove debug points
165 lines
5.9 KiB
Python
165 lines
5.9 KiB
Python
|
|
import glob
|
|
import os
|
|
import sys
|
|
|
|
import numpy as np
|
|
import torchvision
|
|
import torch
|
|
from PIL import Image, ImageFile
|
|
|
|
from openpifpaf.network import nets
|
|
from openpifpaf import decoder
|
|
from openpifpaf import transforms
|
|
from predict.monoloco import MonoLoco
|
|
from predict.factory import factory_for_gt, factory_outputs
|
|
from utils.pifpaf import preprocess_pif
|
|
|
|
|
|
class ImageList(torch.utils.data.Dataset):
|
|
"""It defines transformations to apply to images and outputs of the dataloader"""
|
|
def __init__(self, image_paths, scale, image_transform=None):
|
|
self.image_paths = image_paths
|
|
self.image_transform = image_transform or transforms.image_transform # to_tensor + normalize (from pifpaf)
|
|
self.scale = scale
|
|
|
|
# data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2
|
|
# .0)
|
|
|
|
def __getitem__(self, index):
|
|
image_path = self.image_paths[index]
|
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
|
with open(image_path, 'rb') as f:
|
|
image = Image.open(f).convert('RGB')
|
|
|
|
if self.scale > 1.01 or self.scale < 0.99:
|
|
image = torchvision.transforms.functional.resize(image,
|
|
(round(self.scale * image.size[1]),
|
|
round(self.scale * image.size[0])),
|
|
interpolation=Image.BICUBIC)
|
|
# PIL images are not iterables
|
|
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
|
|
image = self.image_transform(image)
|
|
|
|
return image_path, original_image, image
|
|
|
|
def __len__(self):
|
|
return len(self.image_paths)
|
|
|
|
|
|
def factory_from_args(args):
|
|
|
|
# Merge the model_pifpaf argument
|
|
if not args.checkpoint:
|
|
args.checkpoint = args.model_pifpaf
|
|
# glob
|
|
if args.glob:
|
|
args.images += glob.glob(args.glob)
|
|
if not args.images:
|
|
raise Exception("no image files given")
|
|
|
|
# add args.device
|
|
args.device = torch.device('cpu')
|
|
args.pin_memory = False
|
|
if torch.cuda.is_available():
|
|
args.device = torch.device('cuda')
|
|
args.pin_memory = True
|
|
|
|
# Add num_workers
|
|
args.loader_workers = 8
|
|
|
|
# Add visualization defaults
|
|
args.figure_width = 10
|
|
args.dpi_factor = 1.0
|
|
|
|
return args
|
|
|
|
|
|
def predict(args):
|
|
|
|
cnt = 0
|
|
factory_from_args(args)
|
|
|
|
# load pifpaf model
|
|
model_pifpaf, _ = nets.factory_from_args(args)
|
|
model_pifpaf = model_pifpaf.to(args.device)
|
|
processor = decoder.factory_from_args(args, model_pifpaf)
|
|
|
|
# load monoloco
|
|
monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
|
|
|
# data
|
|
data = ImageList(args.images, scale=args.scale)
|
|
data_loader = torch.utils.data.DataLoader(
|
|
data, batch_size=1, shuffle=False,
|
|
pin_memory=args.pin_memory, num_workers=args.loader_workers)
|
|
|
|
keypoints_whole = []
|
|
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
|
|
images = image_tensors.permute(0, 2, 3, 1)
|
|
|
|
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
|
|
fields_batch = processor.fields(processed_images)
|
|
|
|
# unbatch
|
|
for image_path, image, processed_image_cpu, fields in zip(
|
|
image_paths,
|
|
images,
|
|
processed_images_cpu,
|
|
fields_batch):
|
|
|
|
if args.output_directory is None:
|
|
output_path = image_path
|
|
else:
|
|
file_name = os.path.basename(image_path)
|
|
output_path = os.path.join(args.output_directory, file_name)
|
|
print('image', idx, image_path, output_path)
|
|
|
|
processor.set_cpu_image(image, processed_image_cpu)
|
|
keypoint_sets, scores = processor.keypoint_sets(fields)
|
|
|
|
# Correct to not change the confidence
|
|
scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
|
|
|
|
if keypoint_sets.size > 0:
|
|
keypoints_whole.append(np.around((keypoint_sets / scale_np), 1)
|
|
.reshape(keypoint_sets.shape[0], -1).tolist())
|
|
|
|
pifpaf_out = [
|
|
{'keypoints': np.around(kps / scale_np, 1).reshape(-1).tolist(),
|
|
'bbox': [np.min(kps[:, 0]) / args.scale, np.min(kps[:, 1]) / args.scale,
|
|
np.max(kps[:, 0]) / args.scale, np.max(kps[:, 1]) / args.scale]}
|
|
for kps in keypoint_sets
|
|
]
|
|
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
|
|
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
|
|
|
|
if 'monoloco' in args.networks:
|
|
im_size = (float(image.size()[1] / args.scale),
|
|
float(image.size()[0] / args.scale)) # Width, Height (original)
|
|
|
|
# Extract calibration matrix and ground truth file if present
|
|
|
|
with open(image_path, 'rb') as f:
|
|
pil_image = Image.open(f).convert('RGB')
|
|
images_outputs.append(pil_image)
|
|
|
|
im_name = os.path.basename(image_path)
|
|
|
|
kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
|
|
|
|
# Preprocess pifpaf outputs and run monoloco
|
|
boxes, keypoints = preprocess_pif(pifpaf_out, im_size)
|
|
outputs, varss = monoloco.forward(keypoints, kk)
|
|
monoloco_outputs = [outputs, varss, boxes, keypoints, kk, dic_gt]
|
|
|
|
else:
|
|
monoloco_outputs = None
|
|
kk = None
|
|
|
|
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, monoloco_outputs=monoloco_outputs, kk=kk)
|
|
sys.stdout.write('\r' + 'Saving image {}'.format(cnt) + '\t')
|
|
cnt += 1
|
|
return keypoints_whole
|
|
|