refactor (#8)

* Make import from __init__ files

* add in init only classes or utils functions

* refactor packages

* fix pylint cyclic import

* add task error with 63% confidence intervals and mad

* fix pixel_error

* update setup

* update installation istructions

* update instructions

* update instructions

* update package installation
This commit is contained in:
Lorenzo Bertoni 2019-07-23 15:55:46 +02:00 committed by GitHub
parent 235a90ef47
commit 8366a436ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 401 additions and 398 deletions

View File

@ -4,10 +4,8 @@ python:
- "3.6" - "3.6"
- "3.7" - "3.7"
install: install:
- pip install openpifpaf - pip install --upgrade pip setuptools
- pip install nuscenes-devkit - pip install ".[test]"
- pip install tabulate
- pip install pylint
script: script:
- pylint monoloco --disable=unused-variable,fixme - pylint monoloco --disable=unused-variable,fixme
- pytest -vv - pytest -vv

View File

@ -29,14 +29,14 @@ A video with qualitative results is available on [YouTube](https://www.youtube.c
Python 3 is required. Python 2 is not supported. Python 3 is required. Python 2 is not supported.
Do not clone this repository and make sure there is no folder named monoloco in your current directory. Do not clone this repository and make sure there is no folder named monoloco in your current directory.
`pip install monoloco` `pip3 install monoloco`
Live demo is available, we recommend to try our **Webcam** functionality. More info in the webcam section. Live demo is available, we recommend to try our **Webcam** functionality. More info in the webcam section.
For development of the monoloco source code itself, you need to clone this repository and then: For development of the monoloco source code itself, you need to clone this repository and then:
``` ```
pip install openpifpaf nuscenes-devkit tabulate pip3 install -e '.[test, prep]'
``` ```
Python 3.6 or 3.7 is required for nuScenes development kit. Python 3 is required for openpifpaf. Python 3.6 or 3.7 is required for nuScenes development kit. Python 3 is required for openpifpaf.
All details for Pifpaf pose detector at [openpifpaf](https://github.com/vita-epfl/openpifpaf). All details for Pifpaf pose detector at [openpifpaf](https://github.com/vita-epfl/openpifpaf).
@ -135,6 +135,7 @@ Multiple visualizations can be combined in different windows.
The above gif has been obtained running on a Macbook the command: The above gif has been obtained running on a Macbook the command:
`pip3 install opencv-python`
`python3 -m monoloco.run predict --webcam --scale 0.2 --output_types combined --z_max 10 --checkpoint resnet50` `python3 -m monoloco.run predict --webcam --scale 0.2 --output_types combined --z_max 10 --checkpoint resnet50`
# Preprocess # Preprocess
@ -152,6 +153,8 @@ data/kitti/images`
Download nuScenes dataset from [nuScenes](https://www.nuscenes.org/download) (either Mini or TrainVal), Download nuScenes dataset from [nuScenes](https://www.nuscenes.org/download) (either Mini or TrainVal),
save it anywhere and soft link it in `data/nuscenes` save it anywhere and soft link it in `data/nuscenes`
nuScenes preprocessing requires `pip3 install nuscenes-devkit`
### Annotations to preprocess ### Annotations to preprocess
MonoLoco is trained using 2D human pose joints. To create them run pifaf over KITTI or nuScenes training images. MonoLoco is trained using 2D human pose joints. To create them run pifaf over KITTI or nuScenes training images.

View File

@ -1,4 +1,4 @@
"""Open implementation of MonoLoco.""" """Open implementation of MonoLoco."""
__version__ = '0.4.1' __version__ = '0.4.2'

View File

@ -0,0 +1,4 @@
from .eval_kitti import EvalKitti
from .generate_kitti import GenerateKitti
from .geom_baseline import geometric_baseline

View File

@ -13,10 +13,9 @@ from itertools import chain
from tabulate import tabulate from tabulate import tabulate
from ..utils.iou import get_iou_matches from ..utils import get_iou_matches, get_task_error, get_pixel_error, check_conditions, get_category, split_training, \
from ..utils.misc import get_task_error, get_pixel_error parse_ground_truth
from ..utils.kitti import check_conditions, get_category, split_training, parse_ground_truth from ..visuals import print_results
from ..visuals.results import print_results
class EvalKitti: class EvalKitti:

View File

@ -13,12 +13,10 @@ import copy
import numpy as np import numpy as np
import torch import torch
from ..predict.network import MonoLoco from ..network import MonoLoco
from ..network.process import preprocess_pifpaf
from ..eval.geom_baseline import compute_distance from ..eval.geom_baseline import compute_distance
from ..utils.kitti import get_calibration from ..utils import get_keypoints, pixel_to_camera, xyz_from_distance, get_calibration, depth_from_disparity
from ..utils.pifpaf import preprocess_pif
from ..utils.camera import xyz_from_distance, get_keypoints, pixel_to_camera
from ..utils.stereo import depth_from_disparity
class GenerateKitti: class GenerateKitti:
@ -51,7 +49,7 @@ class GenerateKitti:
for basename in self.list_basename: for basename in self.list_basename:
path_calib = os.path.join(self.dir_kk, basename + '.txt') path_calib = os.path.join(self.dir_kk, basename + '.txt')
annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename) annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename)
boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374)) boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374))
if not keypoints: if not keypoints:
cnt_no_file += 1 cnt_no_file += 1
@ -95,7 +93,7 @@ class GenerateKitti:
for mode in ['left', 'right']: for mode in ['left', 'right']:
annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename, mode=mode) annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename, mode=mode)
boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374)) boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374))
if not keypoints and mode == 'left': if not keypoints and mode == 'left':
cnt_no_file += 1 cnt_no_file += 1

View File

@ -6,7 +6,7 @@ from collections import defaultdict
import numpy as np import numpy as np
from ..utils.camera import pixel_to_camera, get_keypoints from ..utils import pixel_to_camera, get_keypoints
AVERAGE_Y = 0.48 AVERAGE_Y = 0.48
CLUSTERS = ['10', '20', '30', 'all'] CLUSTERS = ['10', '20', '30', 'all']

View File

@ -0,0 +1,4 @@
from .pifpaf import PifPaf, ImageList
from .losses import LaplacianLoss
from .net import MonoLoco

View File

@ -1,3 +1,4 @@
import math import math
import torch import torch
import numpy as np import numpy as np

View File

@ -1,6 +1,6 @@
""" """
Monoloco predictor. It receives pifpaf joints and outputs distances Monoloco class. From 2D joints to real-world distances
""" """
import logging import logging
@ -8,10 +8,9 @@ from collections import defaultdict
import torch import torch
from ..utils.iou import get_iou_matches, reorder_matches from ..utils import get_iou_matches, reorder_matches, get_keypoints, pixel_to_camera, xyz_from_distance
from ..utils.camera import get_keypoints, pixel_to_camera, xyz_from_distance from .process import preprocess_monoloco, unnormalize_bi, laplace_sampling
from ..utils.network import get_monoloco_inputs, unnormalize_bi, laplace_sampling from .architectures import LinearModel
from ..train.architectures import LinearModel
class MonoLoco: class MonoLoco:
@ -43,7 +42,7 @@ class MonoLoco:
return None, None return None, None
with torch.no_grad(): with torch.no_grad():
inputs = get_monoloco_inputs(torch.tensor(keypoints).to(self.device), torch.tensor(kk).to(self.device)) inputs = preprocess_monoloco(torch.tensor(keypoints).to(self.device), torch.tensor(kk).to(self.device))
if self.n_dropout > 0: if self.n_dropout > 0:
self.model.dropout.training = True # Manually reactivate dropout in eval self.model.dropout.training = True # Manually reactivate dropout in eval
total_outputs = torch.empty((0, inputs.size()[0])).to(self.device) total_outputs = torch.empty((0, inputs.size()[0])).to(self.device)

154
monoloco/network/process.py Normal file
View File

@ -0,0 +1,154 @@
import json
import numpy as np
import torch
from ..utils import get_keypoints, pixel_to_camera
def preprocess_monoloco(keypoints, kk):
""" Preprocess batches of inputs
keypoints = torch tensors of (m, 3, 17) or list [3,17]
Outputs = torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box
"""
if isinstance(keypoints, list):
keypoints = torch.tensor(keypoints)
if isinstance(kk, list):
kk = torch.tensor(kk)
# Projection in normalized image coordinates and zero-center with the center of the bounding box
uv_center = get_keypoints(keypoints, mode='center')
xy1_center = pixel_to_camera(uv_center, kk, 10)
xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10)
# xy1_center[:, 1].fill_(0) #TODO
kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3)
kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view
return kps_out
def factory_for_gt(im_size, name=None, path_gt=None):
"""Look for ground-truth annotations file and define calibration matrix based on image size """
try:
with open(path_gt, 'r') as f:
dic_names = json.load(f)
print('-' * 120 + "\nGround-truth file opened")
except (FileNotFoundError, TypeError):
print('-' * 120 + "\nGround-truth file not found")
dic_names = {}
try:
kk = dic_names[name]['K']
dic_gt = dic_names[name]
print("Matched ground-truth file!")
except KeyError:
dic_gt = None
x_factor = im_size[0] / 1600
y_factor = im_size[1] / 900
pixel_factor = (x_factor + y_factor) / 2 # TODO remove and check it
if im_size[0] / im_size[1] > 2.5:
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
else:
kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor],
[0, 1266.4 * pixel_factor, 491.5 * y_factor],
[0., 0., 1.]] # nuScenes calibration
print("Using a standard calibration matrix...")
return kk, dic_gt
def laplace_sampling(outputs, n_samples):
# np.random.seed(1)
mu = outputs[:, 0]
bi = torch.abs(outputs[:, 1])
# Analytical
# uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0])
# xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu))
# Sampling
cuda_check = outputs.is_cuda
if cuda_check:
get_device = outputs.get_device()
device = torch.device(type="cuda", index=get_device)
else:
device = torch.device("cpu")
laplace = torch.distributions.Laplace(mu, bi)
xx = laplace.sample((n_samples,)).to(device)
return xx
def epistemic_variance(total_outputs):
"""Compute epistemic variance"""
# var_y = np.sum(total_outputs**2, axis=0) / total_outputs.shape[0] - (np.mean(total_outputs, axis=0))**2
var_y = np.var(total_outputs, axis=0)
lower_b = np.quantile(a=total_outputs, q=0.25, axis=0)
upper_b = np.quantile(a=total_outputs, q=0.75, axis=0)
var_new = (upper_b - lower_b)
return var_y, var_new
def unnormalize_bi(outputs):
"""Unnormalize relative bi of a nunmpy array"""
outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0]
return outputs
def preprocess_pifpaf(annotations, im_size=None):
"""
Preprocess pif annotations:
1. enlarge the box of 10%
2. Constraint it inside the image (if image_size provided)
"""
boxes = []
keypoints = []
for dic in annotations:
box = dic['bbox']
if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0)
return [], []
kps = prepare_pif_kps(dic['keypoints'])
conf = float(np.sort(np.array(kps[2]))[-3]) # The confidence is the 3rd highest value for the keypoints
# Add 15% for y and 20% for x
delta_h = (box[3] - box[1]) / 7
delta_w = (box[2] - box[0]) / 3.5
assert delta_h > -5 and delta_w > -5, "Bounding box <=0"
box[0] -= delta_w
box[1] -= delta_h
box[2] += delta_w
box[3] += delta_h
# Put the box inside the image
if im_size is not None:
box[0] = max(0, box[0])
box[1] = max(0, box[1])
box[2] = min(box[2], im_size[0])
box[3] = min(box[3], im_size[1])
box.append(conf)
boxes.append(box)
keypoints.append(kps)
return boxes, keypoints
def prepare_pif_kps(kps_in):
"""Convert from a list of 51 to a list of 3, 17"""
assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3"
xxs = kps_in[0:][::3]
yys = kps_in[1:][::3] # from offset 1 every 3
ccs = kps_in[2:][::3]
return [xxs, yys, ccs]

123
monoloco/predict.py Normal file
View File

@ -0,0 +1,123 @@
import os
import json
import torch
from PIL import Image
from openpifpaf import show
from .visuals.printer import Printer
from .network import PifPaf, ImageList, MonoLoco
from .network.process import factory_for_gt, preprocess_pifpaf
def predict(args):
cnt = 0
# load pifpaf and monoloco models
pifpaf = PifPaf(args)
monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
# data
data = ImageList(args.images, scale=args.scale)
data_loader = torch.utils.data.DataLoader(
data, batch_size=1, shuffle=False,
pin_memory=args.pin_memory, num_workers=args.loader_workers)
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
images = image_tensors.permute(0, 2, 3, 1)
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
fields_batch = pifpaf.fields(processed_images)
# unbatch
for image_path, image, processed_image_cpu, fields in zip(
image_paths, images, processed_images_cpu, fields_batch):
if args.output_directory is None:
output_path = image_path
else:
file_name = os.path.basename(image_path)
output_path = os.path.join(args.output_directory, file_name)
print('image', idx, image_path, output_path)
keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
if 'monoloco' in args.networks:
im_size = (float(image.size()[1] / args.scale),
float(image.size()[0] / args.scale)) # Width, Height (original)
# Extract calibration matrix and ground truth file if present
with open(image_path, 'rb') as f:
pil_image = Image.open(f).convert('RGB')
images_outputs.append(pil_image)
im_name = os.path.basename(image_path)
kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
# Preprocess pifpaf outputs and run monoloco
boxes, keypoints = preprocess_pifpaf(pifpaf_out, im_size)
outputs, varss = monoloco.forward(keypoints, kk)
dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dic_gt)
else:
dic_out = None
kk = None
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
print('Image {}\n'.format(cnt) + '-' * 120)
cnt += 1
def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None):
"""Output json files or images according to the choice"""
# Save json file
if 'pifpaf' in args.networks:
keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:]
# Visualizer
keypoint_painter = show.KeypointPainter(show_box=False)
skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
markersize=1, linewidth=4)
if 'json' in args.output_types and keypoint_sets.size > 0:
with open(output_path + '.pifpaf.json', 'w') as f:
json.dump(pifpaf_out, f)
if 'keypoints' in args.output_types:
with show.image_canvas(images_outputs[0],
output_path + '.keypoints.png',
show=args.show,
fig_width=args.figure_width,
dpi_factor=args.dpi_factor) as ax:
keypoint_painter.keypoints(ax, keypoint_sets)
if 'skeleton' in args.output_types:
with show.image_canvas(images_outputs[0],
output_path + '.skeleton.png',
show=args.show,
fig_width=args.figure_width,
dpi_factor=args.dpi_factor) as ax:
skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
if 'monoloco' in args.networks:
if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])):
epistemic = False
if args.n_dropout > 0:
epistemic = True
if dic_out['boxes']: # Only print in case of detections
printer = Printer(images_outputs[1], output_path, kk, output_types=args.output_types
, z_max=args.z_max, epistemic=epistemic)
figures, axes = printer.factory_axes()
printer.draw(figures, axes, dic_out, images_outputs[1], draw_box=args.draw_box,
save=True, show=args.show)
if 'json' in args.output_types:
with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
json.dump(dic_out, ff)

View File

@ -1,87 +0,0 @@
import json
import os
from openpifpaf import show
from ..visuals.printer import Printer
def factory_for_gt(im_size, name=None, path_gt=None):
"""Look for ground-truth annotations file and define calibration matrix based on image size """
try:
with open(path_gt, 'r') as f:
dic_names = json.load(f)
print('-' * 120 + "\nGround-truth file opened")
except (FileNotFoundError, TypeError):
print('-' * 120 + "\nGround-truth file not found")
dic_names = {}
try:
kk = dic_names[name]['K']
dic_gt = dic_names[name]
print("Matched ground-truth file!")
except KeyError:
dic_gt = None
x_factor = im_size[0] / 1600
y_factor = im_size[1] / 900
pixel_factor = (x_factor + y_factor) / 2 # TODO remove and check it
if im_size[0] / im_size[1] > 2.5:
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
else:
kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor],
[0, 1266.4 * pixel_factor, 491.5 * y_factor],
[0., 0., 1.]] # nuScenes calibration
print("Using a standard calibration matrix...")
return kk, dic_gt
def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None):
"""Output json files or images according to the choice"""
# Save json file
if 'pifpaf' in args.networks:
keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:]
# Visualizer
keypoint_painter = show.KeypointPainter(show_box=False)
skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
markersize=1, linewidth=4)
if 'json' in args.output_types and keypoint_sets.size > 0:
with open(output_path + '.pifpaf.json', 'w') as f:
json.dump(pifpaf_out, f)
if 'keypoints' in args.output_types:
with show.image_canvas(images_outputs[0],
output_path + '.keypoints.png',
show=args.show,
fig_width=args.figure_width,
dpi_factor=args.dpi_factor) as ax:
keypoint_painter.keypoints(ax, keypoint_sets)
if 'skeleton' in args.output_types:
with show.image_canvas(images_outputs[0],
output_path + '.skeleton.png',
show=args.show,
fig_width=args.figure_width,
dpi_factor=args.dpi_factor) as ax:
skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
if 'monoloco' in args.networks:
if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])):
epistemic = False
if args.n_dropout > 0:
epistemic = True
if dic_out['boxes']: # Only print in case of detections
printer = Printer(images_outputs[1], output_path, kk, output_types=args.output_types
, z_max=args.z_max, epistemic=epistemic)
figures, axes = printer.factory_axes()
printer.draw(figures, axes, dic_out, images_outputs[1], draw_box=args.draw_box,
save=True, show=args.show)
if 'json' in args.output_types:
with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
json.dump(dic_out, ff)

View File

@ -1,72 +0,0 @@
import os
from PIL import Image
import torch
from ..predict.pifpaf import PifPaf, ImageList
from ..predict.network import MonoLoco
from ..predict.factory import factory_for_gt, factory_outputs
from ..utils.pifpaf import preprocess_pif
def predict(args):
cnt = 0
# load pifpaf and monoloco models
pifpaf = PifPaf(args)
monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
# data
data = ImageList(args.images, scale=args.scale)
data_loader = torch.utils.data.DataLoader(
data, batch_size=1, shuffle=False,
pin_memory=args.pin_memory, num_workers=args.loader_workers)
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
images = image_tensors.permute(0, 2, 3, 1)
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
fields_batch = pifpaf.fields(processed_images)
# unbatch
for image_path, image, processed_image_cpu, fields in zip(
image_paths, images, processed_images_cpu, fields_batch):
if args.output_directory is None:
output_path = image_path
else:
file_name = os.path.basename(image_path)
output_path = os.path.join(args.output_directory, file_name)
print('image', idx, image_path, output_path)
keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
if 'monoloco' in args.networks:
im_size = (float(image.size()[1] / args.scale),
float(image.size()[0] / args.scale)) # Width, Height (original)
# Extract calibration matrix and ground truth file if present
with open(image_path, 'rb') as f:
pil_image = Image.open(f).convert('RGB')
images_outputs.append(pil_image)
im_name = os.path.basename(image_path)
kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
# Preprocess pifpaf outputs and run monoloco
boxes, keypoints = preprocess_pif(pifpaf_out, im_size)
outputs, varss = monoloco.forward(keypoints, kk)
dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dic_gt)
else:
dic_out = None
kk = None
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
print('Image {}\n'.format(cnt) + '-' * 120)
cnt += 1

View File

@ -0,0 +1,3 @@
from .preprocess_nu import PreprocessNuscenes
from .preprocess_ki import PreprocessKitti

View File

@ -8,12 +8,9 @@ from collections import defaultdict
import json import json
import datetime import datetime
from ..prep.transforms import transform_keypoints from .transforms import transform_keypoints
from ..utils.kitti import get_calibration, split_training, parse_ground_truth from ..utils import get_calibration, split_training, parse_ground_truth, get_iou_matches, append_cluster
from ..utils.network import get_monoloco_inputs from ..network.process import preprocess_pifpaf, preprocess_monoloco
from ..utils.pifpaf import preprocess_pif
from ..utils.iou import get_iou_matches
from ..utils.misc import append_cluster
class PreprocessKitti: class PreprocessKitti:
@ -84,10 +81,10 @@ class PreprocessKitti:
try: try:
with open(os.path.join(self.dir_ann, basename + '.png.pifpaf.json'), 'r') as f: with open(os.path.join(self.dir_ann, basename + '.png.pifpaf.json'), 'r') as f:
annotations = json.load(f) annotations = json.load(f)
boxes, keypoints = preprocess_pif(annotations, im_size=(1238, 374)) boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1238, 374))
keypoints_hflip = transform_keypoints(keypoints, mode='flip') keypoints_hflip = transform_keypoints(keypoints, mode='flip')
inputs = get_monoloco_inputs(keypoints, kk).tolist() inputs = preprocess_monoloco(keypoints, kk).tolist()
inputs_hflip = get_monoloco_inputs(keypoints, kk).tolist() inputs_hflip = preprocess_monoloco(keypoints, kk).tolist()
all_keypoints = [keypoints, keypoints_hflip] all_keypoints = [keypoints, keypoints_hflip]
all_inputs = [inputs, inputs_hflip] all_inputs = [inputs, inputs_hflip]

View File

@ -10,16 +10,11 @@ from collections import defaultdict
import datetime import datetime
import numpy as np import numpy as np
from nuscenes.nuscenes import NuScenes from nuscenes.nuscenes import NuScenes
from nuscenes.utils import splits from nuscenes.utils import splits
from ..utils.iou import get_iou_matches from ..utils import get_iou_matches, append_cluster, select_categories, project_3d
from ..utils.misc import append_cluster from ..network.process import preprocess_pifpaf, preprocess_monoloco
from ..utils.nuscenes import select_categories
from ..utils.camera import project_3d
from ..utils.pifpaf import preprocess_pif
from ..utils.network import get_monoloco_inputs
class PreprocessNuscenes: class PreprocessNuscenes:
@ -97,12 +92,12 @@ class PreprocessNuscenes:
if exists: if exists:
with open(path_pif, 'r') as file: with open(path_pif, 'r') as file:
annotations = json.load(file) annotations = json.load(file)
boxes, keypoints = preprocess_pif(annotations, im_size=(1600, 900)) boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900))
else: else:
continue continue
if keypoints: if keypoints:
inputs = get_monoloco_inputs(keypoints, kk).tolist() inputs = preprocess_monoloco(keypoints, kk).tolist()
matches = get_iou_matches(boxes, boxes_gt, self.iou_min) matches = get_iou_matches(boxes, boxes_gt, self.iou_min)
for (idx, idx_gt) in matches: for (idx, idx_gt) in matches:

View File

@ -1,20 +1,11 @@
# pylint: skip-file # pylint: skip-file
import argparse import argparse
from openpifpaf.network import nets from openpifpaf.network import nets
from openpifpaf import decoder from openpifpaf import decoder
from .prep.preprocess_nu import PreprocessNuscenes
from .prep.preprocess_ki import PreprocessKitti
from .predict.predict import predict
from .train.trainer import Trainer
from .eval.generate_kitti import GenerateKitti
from .eval.geom_baseline import geometric_baseline
from .train.hyp_tuning import HypTuning
from .eval.eval_kitti import EvalKitti
from .visuals.webcam import webcam
def cli(): def cli():
@ -105,28 +96,33 @@ def cli():
def main(): def main():
args = cli() args = cli()
if args.command == 'predict': if args.command == 'predict':
if args.webcam: if args.webcam:
from .visuals.webcam import webcam
webcam(args) webcam(args)
else: else:
from .predict import predict
predict(args) predict(args)
elif args.command == 'prep': elif args.command == 'prep':
if 'nuscenes' in args.dataset: if 'nuscenes' in args.dataset:
from .prep import PreprocessNuscenes
prep = PreprocessNuscenes(args.dir_ann, args.dir_nuscenes, args.dataset, args.iou_min) prep = PreprocessNuscenes(args.dir_ann, args.dir_nuscenes, args.dataset, args.iou_min)
prep.run() prep.run()
if 'kitti' in args.dataset: if 'kitti' in args.dataset:
from .prep import PreprocessKitti
prep = PreprocessKitti(args.dir_ann, args.iou_min) prep = PreprocessKitti(args.dir_ann, args.iou_min)
prep.run() prep.run()
elif args.command == 'train': elif args.command == 'train':
from .train import HypTuning
if args.hyp: if args.hyp:
hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs, hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs,
baseline=args.baseline, dropout=args.dropout, baseline=args.baseline, dropout=args.dropout,
multiplier=args.multiplier, r_seed=args.r_seed) multiplier=args.multiplier, r_seed=args.r_seed)
hyp_tuning.train() hyp_tuning.train()
else: else:
from .train import Trainer
training = Trainer(joints=args.joints, epochs=args.epochs, bs=args.bs, training = Trainer(joints=args.joints, epochs=args.epochs, bs=args.bs,
baseline=args.baseline, dropout=args.dropout, lr=args.lr, sched_step=args.sched_step, baseline=args.baseline, dropout=args.dropout, lr=args.lr, sched_step=args.sched_step,
n_stage=args.n_stage, sched_gamma=args.sched_gamma, hidden_size=args.hidden_size, n_stage=args.n_stage, sched_gamma=args.sched_gamma, hidden_size=args.hidden_size,
@ -137,20 +133,24 @@ def main():
elif args.command == 'eval': elif args.command == 'eval':
if args.geometric: if args.geometric:
from .eval import geometric_baseline
geometric_baseline(args.joints) geometric_baseline(args.joints)
if args.generate: if args.generate:
from .eval import GenerateKitti
kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout) kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout)
kitti_txt.run_mono() kitti_txt.run_mono()
if args.stereo: if args.stereo:
kitti_txt.run_stereo() kitti_txt.run_stereo()
if args.dataset == 'kitti': if args.dataset == 'kitti':
from .eval import EvalKitti
kitti_eval = EvalKitti(verbose=args.verbose, stereo=args.stereo) kitti_eval = EvalKitti(verbose=args.verbose, stereo=args.stereo)
kitti_eval.run() kitti_eval.run()
kitti_eval.printer(show=args.show) kitti_eval.printer(show=args.show)
if 'nuscenes' in args.dataset: if 'nuscenes' in args.dataset:
from .train import Trainer
training = Trainer(joints=args.joints) training = Trainer(joints=args.joints)
_ = training.evaluate(load=True, model=args.model, debug=False) _ = training.evaluate(load=True, model=args.model, debug=False)

View File

@ -0,0 +1,3 @@
from .hyp_tuning import HypTuning
from .trainer import Trainer

View File

@ -1,4 +1,4 @@
# pylint: skip-file # TODO # pylint: skip-file # TODO make train file and class trainer and
""" """
Training and evaluation of a neural network which predicts 3D localization and confidence intervals Training and evaluation of a neural network which predicts 3D localization and confidence intervals
@ -19,11 +19,11 @@ import torch.nn as nn
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torch.optim import lr_scheduler from torch.optim import lr_scheduler
from .datasets import KeypointsDataset from train.datasets import KeypointsDataset
from .architectures import LinearModel from ..network import LaplacianLoss
from .losses import LaplacianLoss from ..network.process import laplace_sampling, unnormalize_bi
from ..utils.logs import set_logger from ..network.architectures import LinearModel
from ..utils.network import laplace_sampling, unnormalize_bi from ..utils import set_logger
class Trainer: class Trainer:

View File

@ -0,0 +1,8 @@
from .iou import get_iou_matches, reorder_matches, get_iou_matrix
from .misc import get_task_error, get_pixel_error, append_cluster
from .kitti import check_conditions, get_category, split_training, parse_ground_truth, get_calibration
from .camera import xyz_from_distance, get_keypoints, pixel_to_camera, project_3d
from .logs import set_logger
from .stereo import depth_from_disparity
from ..utils.nuscenes import select_categories

View File

@ -1,6 +1,4 @@
import random
def append_cluster(dic_jo, phase, xx, dd, kps): def append_cluster(dic_jo, phase, xx, dd, kps):
"""Append the annotation based on its distance""" """Append the annotation based on its distance"""
@ -27,20 +25,19 @@ def append_cluster(dic_jo, phase, xx, dd, kps):
def get_task_error(dd, mode='std'): def get_task_error(dd, mode='std'):
"""Get target error not knowing the gender""" """Get target error not knowing the gender, modeled through a Gaussian Mixure model"""
assert mode in ('std', 'mad') assert mode in ('std', 'mad')
h_mean = 171.5 # average h of the human distribution
if mode == 'std': if mode == 'std':
mm_gender = 0.0557 delta_h = 9.07 # delta h for 63% confidence interval
elif mode == 'mad': # mean absolute deviation elif mode == 'mad':
mm_gender = 0.0457 delta_h = 7.83 # delta_h of mean absolute deviation
return mm_gender * dd return dd * (1 - h_mean / (h_mean + delta_h))
def get_pixel_error(dd_gt, zz_gt): def get_pixel_error(dd_gt, zz_gt):
"""calculate error in stereo distance due to +-1 pixel mismatch (function of depth)""" """calculate error in stereo distance due to 1 pixel mismatch (function of depth)"""
disp = 0.54 * 721 / zz_gt disp = 0.54 * 721 / zz_gt
random.seed(1) delta_z = zz_gt - 0.54 * 721 / (disp - 1)
sign = random.choice((-1, 1))
delta_z = zz_gt - 0.54 * 721 / (disp + sign)
return dd_gt + delta_z return dd_gt + delta_z

View File

@ -1,67 +0,0 @@
import numpy as np
import torch
from ..utils.camera import get_keypoints, pixel_to_camera
def get_monoloco_inputs(keypoints, kk):
""" Preprocess batches of inputs
keypoints = torch tensors of (m, 3, 17) or list [3,17]
Outputs = torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box
"""
if isinstance(keypoints, list):
keypoints = torch.tensor(keypoints)
if isinstance(kk, list):
kk = torch.tensor(kk)
# Projection in normalized image coordinates and zero-center with the center of the bounding box
uv_center = get_keypoints(keypoints, mode='center')
xy1_center = pixel_to_camera(uv_center, kk, 10)
xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10)
# xy1_center[:, 1].fill_(0) #TODO
kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3)
kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view
return kps_out
def laplace_sampling(outputs, n_samples):
# np.random.seed(1)
mu = outputs[:, 0]
bi = torch.abs(outputs[:, 1])
# Analytical
# uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0])
# xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu))
# Sampling
cuda_check = outputs.is_cuda
if cuda_check:
get_device = outputs.get_device()
device = torch.device(type="cuda", index=get_device)
else:
device = torch.device("cpu")
laplace = torch.distributions.Laplace(mu, bi)
xx = laplace.sample((n_samples,)).to(device)
return xx
def epistemic_variance(total_outputs):
"""Compute epistemic variance"""
# var_y = np.sum(total_outputs**2, axis=0) / total_outputs.shape[0] - (np.mean(total_outputs, axis=0))**2
var_y = np.var(total_outputs, axis=0)
lower_b = np.quantile(a=total_outputs, q=0.25, axis=0)
upper_b = np.quantile(a=total_outputs, q=0.75, axis=0)
var_new = (upper_b - lower_b)
return var_y, var_new
def unnormalize_bi(outputs):
"""Unnormalize relative bi of a nunmpy array"""
outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0]
return outputs

View File

@ -2,6 +2,7 @@
import random import random
import json import json
import os import os
import numpy as np import numpy as np

View File

@ -1,54 +0,0 @@
import numpy as np
def preprocess_pif(annotations, im_size=None):
"""
Preprocess pif annotations:
1. enlarge the box of 10%
2. Constraint it inside the image (if image_size provided)
"""
boxes = []
keypoints = []
for dic in annotations:
box = dic['bbox']
if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0)
return [], []
kps = prepare_pif_kps(dic['keypoints'])
conf = float(np.sort(np.array(kps[2]))[-3]) # The confidence is the 3rd highest value for the keypoints
# Add 15% for y and 20% for x
delta_h = (box[3] - box[1]) / 7
delta_w = (box[2] - box[0]) / 3.5
assert delta_h > -5 and delta_w > -5, "Bounding box <=0"
box[0] -= delta_w
box[1] -= delta_h
box[2] += delta_w
box[3] += delta_h
# Put the box inside the image
if im_size is not None:
box[0] = max(0, box[0])
box[1] = max(0, box[1])
box[2] = min(box[2], im_size[0])
box[3] = min(box[3], im_size[1])
box.append(conf)
boxes.append(box)
keypoints.append(kps)
return boxes, keypoints
def prepare_pif_kps(kps_in):
"""Convert from a list of 51 to a list of 3, 17"""
assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3"
xxs = kps_in[0:][::3]
yys = kps_in[1:][::3] # from offset 1 every 3
ccs = kps_in[2:][::3]
return [xxs, yys, ccs]

View File

@ -49,7 +49,7 @@ def filter_disparities(kps, kps_right_list, idx, expected_disps):
disparity_y_conf = np.where(mask_conf, disparity_y, np.nan) disparity_y_conf = np.where(mask_conf, disparity_y, np.nan)
# Mask outliers using iqr # Mask outliers using iqr
mask_outlier = get_iqr_mask(disparity_x_conf) mask_outlier = interquartile_mask(disparity_x_conf)
disparity_x_mask = np.where(mask_outlier, disparity_x_conf, np.nan) disparity_x_mask = np.where(mask_outlier, disparity_x_conf, np.nan)
disparity_y_mask = np.where(mask_outlier, disparity_y_conf, np.nan) disparity_y_mask = np.where(mask_outlier, disparity_y_conf, np.nan)
avg_disparity_x = np.nanmedian(disparity_x_mask, axis=1) # ignore the nan avg_disparity_x = np.nanmedian(disparity_x_mask, axis=1) # ignore the nan
@ -79,7 +79,7 @@ def verify_stereo(zz_stereo, zz_mono, disparity_x, disparity_y):
return False return False
def get_iqr_mask(distribution): def interquartile_mask(distribution):
quartile_1, quartile_3 = np.nanpercentile(distribution, [25, 75], axis=1) quartile_1, quartile_3 = np.nanpercentile(distribution, [25, 75], axis=1)
iqr = quartile_3 - quartile_1 iqr = quartile_3 - quartile_1
lower_bound = quartile_1 - (iqr * 1.5) lower_bound = quartile_1 - (iqr * 1.5)

View File

@ -0,0 +1,3 @@
from .printer import Printer
from .results import print_results

View File

@ -1,11 +1,12 @@
# pylint: skip-file # pylint: skip-file
import numpy as np
import os
import math import math
import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse from matplotlib.patches import Ellipse
from visuals.printer import get_angle
from .printer import get_angle
def paper(): def paper():
@ -112,30 +113,24 @@ def paper():
plt.close() plt.close()
def target_error(xx, mm): def target_error(xx, mm):
return mm * xx return mm * xx
def gmm(): def gmm():
mu_men = 178 mu_men = 178
std_men = 7 std_men = 7
mu_women = 165 mu_women = 165
std_women = 7 std_women = 7
N_men_1 = np.random.normal(mu_men, std_men, 1000000) N_men = np.random.normal(mu_men, std_men, 10000000)
N_men_2 = np.random.normal(mu_men, std_men, 1000000) N_women = np.random.normal(mu_women, std_women, 10000000)
N_women_1 = np.random.normal(mu_women, std_women, 1000000) N_gmm = np.concatenate((N_men, N_women))
N_women_2 = np.random.normal(mu_women, std_women, 1000000) perc, _ = np.nanpercentile(N_gmm, [18.5, 81.5]) # Laplace bi => 63%
N_gmm_1 = np.concatenate((N_men_1, N_women_1)) mu_gmm = np.mean(N_gmm)
N_gmm_2 = np.concatenate((N_men_2, N_women_2)) bi_gmm = mu_gmm - perc
mu_gmm_1 = np.mean(N_gmm_1) abs_diff = np.abs(mu_gmm - N_gmm)
mu_gmm_2 = np.mean(N_gmm_2)
std_gmm = np.std(N_gmm_1) mean_deviation = np.mean(abs_diff)
mm_gender = std_gmm / mu_gmm_1
var_gmm = np.var(N_gmm_1)
abs_diff_1 = np.abs(mu_gmm_1 - N_gmm_1)
abs_diff_2 = np.mean(np.abs(N_gmm_1 - N_gmm_2))
mean_deviation_1 = np.mean(abs_diff_1)
mean_deviation_2 = np.mean(abs_diff_2)
# sns.distplot(N_men, hist=False, rug=False, label="Men") # sns.distplot(N_men, hist=False, rug=False, label="Men")
# sns.distplot(N_women, hist=False, rug=False, label="Women") # sns.distplot(N_women, hist=False, rug=False, label="Women")
# sns.distplot(N_gmm, hist=False, rug=False, label="GMM") # sns.distplot(N_gmm, hist=False, rug=False, label="GMM")
@ -143,15 +138,10 @@ def gmm():
# plt.ylabel("Height distributions of men and women") # plt.ylabel("Height distributions of men and women")
# plt.legend() # plt.legend()
# plt.show() # plt.show()
print("Mean of GMM distribution: {:.2f}".format(mu_gmm_1)) print("Mean of GMM distribution: {:.2f}".format(mu_gmm))
print("Standard deviation: {:.2f}".format(std_gmm)) print("+- bi interval (63%) : {:.2f}".format(bi_gmm))
print("Relative error (standard deviation) {:.3f} %".format(mm_gender * 100)) print("Mean deviation: {:.2f}".format(mean_deviation))
print("Variance: {:.2f}".format(var_gmm)) print("Relative error (mean absolute deviation): {:.3f} %".format((mean_deviation / mu_gmm) * 100))
print("Mean deviation: {:.2f}".format(mean_deviation_1))
print("Mean deviation 2: {:.2f}".format(mean_deviation_2))
print("Relative error (mean absolute deviation): {:.3f} %".format((mean_deviation_1 / mu_gmm_1) * 100))
return mm_gender
def get_confidence(xx, zz, std): def get_confidence(xx, zz, std):
@ -160,4 +150,4 @@ def get_confidence(xx, zz, std):
delta_x = std * math.cos(theta) delta_x = std * math.cos(theta)
delta_z = std * math.sin(theta) delta_z = std * math.sin(theta)
return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z) return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z)

View File

@ -9,8 +9,7 @@ import matplotlib.cm as cm
from matplotlib.patches import Ellipse, Circle, Rectangle from matplotlib.patches import Ellipse, Circle, Rectangle
from mpl_toolkits.axes_grid1 import make_axes_locatable from mpl_toolkits.axes_grid1 import make_axes_locatable
from ..utils.camera import pixel_to_camera from ..utils import pixel_to_camera, get_task_error
from ..utils.misc import get_task_error
class Printer: class Printer:

View File

@ -1,6 +1,7 @@
# pylint: disable=R0915 # pylint: disable=R0915
import os import os
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse from matplotlib.patches import Ellipse

View File

@ -12,14 +12,11 @@ import torch
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from PIL import Image from PIL import Image
from openpifpaf import transforms from openpifpaf import transforms
import cv2 import cv2
from ..visuals.printer import Printer from ..visuals import Printer
from ..utils.pifpaf import preprocess_pif from ..network import PifPaf, MonoLoco
from ..predict.pifpaf import PifPaf from ..network.process import preprocess_pifpaf, factory_for_gt
from ..predict.network import MonoLoco
from ..predict.factory import factory_for_gt
def webcam(args): def webcam(args):
@ -66,7 +63,7 @@ def webcam(args):
visualizer_monoloco.send(None) visualizer_monoloco.send(None)
if pifpaf_out: if pifpaf_out:
boxes, keypoints = preprocess_pif(pifpaf_out, (width, height)) boxes, keypoints = preprocess_pifpaf(pifpaf_out, (width, height))
outputs, varss = monoloco.forward(keypoints, kk) outputs, varss = monoloco.forward(keypoints, kk)
dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dict_gt) dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dict_gt)
visualizer_monoloco.send((pil_image, dic_out)) visualizer_monoloco.send((pil_image, dic_out))

View File

@ -10,9 +10,9 @@ setup(
version=VERSION, version=VERSION,
packages=[ packages=[
'monoloco', 'monoloco',
'monoloco.train', 'monoloco.network',
'monoloco.predict',
'monoloco.eval', 'monoloco.eval',
'monoloco.train',
'monoloco.prep', 'monoloco.prep',
'monoloco.visuals', 'monoloco.visuals',
'monoloco.utils' 'monoloco.utils'
@ -28,9 +28,15 @@ setup(
install_requires=[ install_requires=[
'openpifpaf', 'openpifpaf',
'nuscenes-devkit', # for nuScenes dataset preprocessing
'tabulate', # For evaluation 'tabulate', # For evaluation
'pylint',
'pytest',
], ],
extras_require={
'test': [
'pylint',
'pytest',
],
'prep': [
'nuscenes-devkit',
],
},
) )

View File

@ -6,7 +6,7 @@ sys.path.insert(0, os.path.join('..', 'monoloco'))
def test_iou(): def test_iou():
from monoloco.utils.iou import get_iou_matrix from monoloco.utils import get_iou_matrix
boxes_pred = [[1, 100, 1, 200]] boxes_pred = [[1, 100, 1, 200]]
boxes_gt = [[100., 120., 150., 160.],[12, 110, 130., 160.]] boxes_gt = [[100., 120., 150., 160.],[12, 110, 130., 160.]]
iou_matrix = get_iou_matrix(boxes_pred, boxes_gt) iou_matrix = get_iou_matrix(boxes_pred, boxes_gt)
@ -14,7 +14,7 @@ def test_iou():
def test_pixel_to_camera(): def test_pixel_to_camera():
from monoloco.utils.camera import pixel_to_camera from monoloco.utils import pixel_to_camera
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]]
zz = 10 zz = 10
uv_vector = [1000., 400.] uv_vector = [1000., 400.]