refactor (#8)

* Make import from __init__ files

* add in init only classes or utils functions

* refactor packages

* fix pylint cyclic import

* add task error with 63% confidence intervals and mad

* fix pixel_error

* update setup

* update installation istructions

* update instructions

* update instructions

* update package installation
This commit is contained in:
Lorenzo Bertoni 2019-07-23 15:55:46 +02:00 committed by GitHub
parent 235a90ef47
commit 8366a436ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 401 additions and 398 deletions

View File

@ -4,10 +4,8 @@ python:
- "3.6"
- "3.7"
install:
- pip install openpifpaf
- pip install nuscenes-devkit
- pip install tabulate
- pip install pylint
- pip install --upgrade pip setuptools
- pip install ".[test]"
script:
- pylint monoloco --disable=unused-variable,fixme
- pytest -vv

View File

@ -29,14 +29,14 @@ A video with qualitative results is available on [YouTube](https://www.youtube.c
Python 3 is required. Python 2 is not supported.
Do not clone this repository and make sure there is no folder named monoloco in your current directory.
`pip install monoloco`
`pip3 install monoloco`
Live demo is available, we recommend to try our **Webcam** functionality. More info in the webcam section.
For development of the monoloco source code itself, you need to clone this repository and then:
```
pip install openpifpaf nuscenes-devkit tabulate
pip3 install -e '.[test, prep]'
```
Python 3.6 or 3.7 is required for nuScenes development kit. Python 3 is required for openpifpaf.
All details for Pifpaf pose detector at [openpifpaf](https://github.com/vita-epfl/openpifpaf).
@ -135,6 +135,7 @@ Multiple visualizations can be combined in different windows.
The above gif has been obtained running on a Macbook the command:
`pip3 install opencv-python`
`python3 -m monoloco.run predict --webcam --scale 0.2 --output_types combined --z_max 10 --checkpoint resnet50`
# Preprocess
@ -152,6 +153,8 @@ data/kitti/images`
Download nuScenes dataset from [nuScenes](https://www.nuscenes.org/download) (either Mini or TrainVal),
save it anywhere and soft link it in `data/nuscenes`
nuScenes preprocessing requires `pip3 install nuscenes-devkit`
### Annotations to preprocess
MonoLoco is trained using 2D human pose joints. To create them run pifaf over KITTI or nuScenes training images.

View File

@ -1,4 +1,4 @@
"""Open implementation of MonoLoco."""
__version__ = '0.4.1'
__version__ = '0.4.2'

View File

@ -0,0 +1,4 @@
from .eval_kitti import EvalKitti
from .generate_kitti import GenerateKitti
from .geom_baseline import geometric_baseline

View File

@ -13,10 +13,9 @@ from itertools import chain
from tabulate import tabulate
from ..utils.iou import get_iou_matches
from ..utils.misc import get_task_error, get_pixel_error
from ..utils.kitti import check_conditions, get_category, split_training, parse_ground_truth
from ..visuals.results import print_results
from ..utils import get_iou_matches, get_task_error, get_pixel_error, check_conditions, get_category, split_training, \
parse_ground_truth
from ..visuals import print_results
class EvalKitti:

View File

@ -13,12 +13,10 @@ import copy
import numpy as np
import torch
from ..predict.network import MonoLoco
from ..network import MonoLoco
from ..network.process import preprocess_pifpaf
from ..eval.geom_baseline import compute_distance
from ..utils.kitti import get_calibration
from ..utils.pifpaf import preprocess_pif
from ..utils.camera import xyz_from_distance, get_keypoints, pixel_to_camera
from ..utils.stereo import depth_from_disparity
from ..utils import get_keypoints, pixel_to_camera, xyz_from_distance, get_calibration, depth_from_disparity
class GenerateKitti:
@ -51,7 +49,7 @@ class GenerateKitti:
for basename in self.list_basename:
path_calib = os.path.join(self.dir_kk, basename + '.txt')
annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename)
boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374))
boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374))
if not keypoints:
cnt_no_file += 1
@ -95,7 +93,7 @@ class GenerateKitti:
for mode in ['left', 'right']:
annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename, mode=mode)
boxes, keypoints = preprocess_pif(annotations, im_size=(1242, 374))
boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1242, 374))
if not keypoints and mode == 'left':
cnt_no_file += 1

View File

@ -6,7 +6,7 @@ from collections import defaultdict
import numpy as np
from ..utils.camera import pixel_to_camera, get_keypoints
from ..utils import pixel_to_camera, get_keypoints
AVERAGE_Y = 0.48
CLUSTERS = ['10', '20', '30', 'all']

View File

@ -0,0 +1,4 @@
from .pifpaf import PifPaf, ImageList
from .losses import LaplacianLoss
from .net import MonoLoco

View File

@ -1,3 +1,4 @@
import math
import torch
import numpy as np

View File

@ -1,6 +1,6 @@
"""
Monoloco predictor. It receives pifpaf joints and outputs distances
Monoloco class. From 2D joints to real-world distances
"""
import logging
@ -8,10 +8,9 @@ from collections import defaultdict
import torch
from ..utils.iou import get_iou_matches, reorder_matches
from ..utils.camera import get_keypoints, pixel_to_camera, xyz_from_distance
from ..utils.network import get_monoloco_inputs, unnormalize_bi, laplace_sampling
from ..train.architectures import LinearModel
from ..utils import get_iou_matches, reorder_matches, get_keypoints, pixel_to_camera, xyz_from_distance
from .process import preprocess_monoloco, unnormalize_bi, laplace_sampling
from .architectures import LinearModel
class MonoLoco:
@ -43,7 +42,7 @@ class MonoLoco:
return None, None
with torch.no_grad():
inputs = get_monoloco_inputs(torch.tensor(keypoints).to(self.device), torch.tensor(kk).to(self.device))
inputs = preprocess_monoloco(torch.tensor(keypoints).to(self.device), torch.tensor(kk).to(self.device))
if self.n_dropout > 0:
self.model.dropout.training = True # Manually reactivate dropout in eval
total_outputs = torch.empty((0, inputs.size()[0])).to(self.device)

154
monoloco/network/process.py Normal file
View File

@ -0,0 +1,154 @@
import json
import numpy as np
import torch
from ..utils import get_keypoints, pixel_to_camera
def preprocess_monoloco(keypoints, kk):
""" Preprocess batches of inputs
keypoints = torch tensors of (m, 3, 17) or list [3,17]
Outputs = torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box
"""
if isinstance(keypoints, list):
keypoints = torch.tensor(keypoints)
if isinstance(kk, list):
kk = torch.tensor(kk)
# Projection in normalized image coordinates and zero-center with the center of the bounding box
uv_center = get_keypoints(keypoints, mode='center')
xy1_center = pixel_to_camera(uv_center, kk, 10)
xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10)
# xy1_center[:, 1].fill_(0) #TODO
kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3)
kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view
return kps_out
def factory_for_gt(im_size, name=None, path_gt=None):
"""Look for ground-truth annotations file and define calibration matrix based on image size """
try:
with open(path_gt, 'r') as f:
dic_names = json.load(f)
print('-' * 120 + "\nGround-truth file opened")
except (FileNotFoundError, TypeError):
print('-' * 120 + "\nGround-truth file not found")
dic_names = {}
try:
kk = dic_names[name]['K']
dic_gt = dic_names[name]
print("Matched ground-truth file!")
except KeyError:
dic_gt = None
x_factor = im_size[0] / 1600
y_factor = im_size[1] / 900
pixel_factor = (x_factor + y_factor) / 2 # TODO remove and check it
if im_size[0] / im_size[1] > 2.5:
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
else:
kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor],
[0, 1266.4 * pixel_factor, 491.5 * y_factor],
[0., 0., 1.]] # nuScenes calibration
print("Using a standard calibration matrix...")
return kk, dic_gt
def laplace_sampling(outputs, n_samples):
# np.random.seed(1)
mu = outputs[:, 0]
bi = torch.abs(outputs[:, 1])
# Analytical
# uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0])
# xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu))
# Sampling
cuda_check = outputs.is_cuda
if cuda_check:
get_device = outputs.get_device()
device = torch.device(type="cuda", index=get_device)
else:
device = torch.device("cpu")
laplace = torch.distributions.Laplace(mu, bi)
xx = laplace.sample((n_samples,)).to(device)
return xx
def epistemic_variance(total_outputs):
"""Compute epistemic variance"""
# var_y = np.sum(total_outputs**2, axis=0) / total_outputs.shape[0] - (np.mean(total_outputs, axis=0))**2
var_y = np.var(total_outputs, axis=0)
lower_b = np.quantile(a=total_outputs, q=0.25, axis=0)
upper_b = np.quantile(a=total_outputs, q=0.75, axis=0)
var_new = (upper_b - lower_b)
return var_y, var_new
def unnormalize_bi(outputs):
"""Unnormalize relative bi of a nunmpy array"""
outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0]
return outputs
def preprocess_pifpaf(annotations, im_size=None):
"""
Preprocess pif annotations:
1. enlarge the box of 10%
2. Constraint it inside the image (if image_size provided)
"""
boxes = []
keypoints = []
for dic in annotations:
box = dic['bbox']
if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0)
return [], []
kps = prepare_pif_kps(dic['keypoints'])
conf = float(np.sort(np.array(kps[2]))[-3]) # The confidence is the 3rd highest value for the keypoints
# Add 15% for y and 20% for x
delta_h = (box[3] - box[1]) / 7
delta_w = (box[2] - box[0]) / 3.5
assert delta_h > -5 and delta_w > -5, "Bounding box <=0"
box[0] -= delta_w
box[1] -= delta_h
box[2] += delta_w
box[3] += delta_h
# Put the box inside the image
if im_size is not None:
box[0] = max(0, box[0])
box[1] = max(0, box[1])
box[2] = min(box[2], im_size[0])
box[3] = min(box[3], im_size[1])
box.append(conf)
boxes.append(box)
keypoints.append(kps)
return boxes, keypoints
def prepare_pif_kps(kps_in):
"""Convert from a list of 51 to a list of 3, 17"""
assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3"
xxs = kps_in[0:][::3]
yys = kps_in[1:][::3] # from offset 1 every 3
ccs = kps_in[2:][::3]
return [xxs, yys, ccs]

123
monoloco/predict.py Normal file
View File

@ -0,0 +1,123 @@
import os
import json
import torch
from PIL import Image
from openpifpaf import show
from .visuals.printer import Printer
from .network import PifPaf, ImageList, MonoLoco
from .network.process import factory_for_gt, preprocess_pifpaf
def predict(args):
cnt = 0
# load pifpaf and monoloco models
pifpaf = PifPaf(args)
monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
# data
data = ImageList(args.images, scale=args.scale)
data_loader = torch.utils.data.DataLoader(
data, batch_size=1, shuffle=False,
pin_memory=args.pin_memory, num_workers=args.loader_workers)
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
images = image_tensors.permute(0, 2, 3, 1)
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
fields_batch = pifpaf.fields(processed_images)
# unbatch
for image_path, image, processed_image_cpu, fields in zip(
image_paths, images, processed_images_cpu, fields_batch):
if args.output_directory is None:
output_path = image_path
else:
file_name = os.path.basename(image_path)
output_path = os.path.join(args.output_directory, file_name)
print('image', idx, image_path, output_path)
keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
if 'monoloco' in args.networks:
im_size = (float(image.size()[1] / args.scale),
float(image.size()[0] / args.scale)) # Width, Height (original)
# Extract calibration matrix and ground truth file if present
with open(image_path, 'rb') as f:
pil_image = Image.open(f).convert('RGB')
images_outputs.append(pil_image)
im_name = os.path.basename(image_path)
kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
# Preprocess pifpaf outputs and run monoloco
boxes, keypoints = preprocess_pifpaf(pifpaf_out, im_size)
outputs, varss = monoloco.forward(keypoints, kk)
dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dic_gt)
else:
dic_out = None
kk = None
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
print('Image {}\n'.format(cnt) + '-' * 120)
cnt += 1
def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None):
"""Output json files or images according to the choice"""
# Save json file
if 'pifpaf' in args.networks:
keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:]
# Visualizer
keypoint_painter = show.KeypointPainter(show_box=False)
skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
markersize=1, linewidth=4)
if 'json' in args.output_types and keypoint_sets.size > 0:
with open(output_path + '.pifpaf.json', 'w') as f:
json.dump(pifpaf_out, f)
if 'keypoints' in args.output_types:
with show.image_canvas(images_outputs[0],
output_path + '.keypoints.png',
show=args.show,
fig_width=args.figure_width,
dpi_factor=args.dpi_factor) as ax:
keypoint_painter.keypoints(ax, keypoint_sets)
if 'skeleton' in args.output_types:
with show.image_canvas(images_outputs[0],
output_path + '.skeleton.png',
show=args.show,
fig_width=args.figure_width,
dpi_factor=args.dpi_factor) as ax:
skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
if 'monoloco' in args.networks:
if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])):
epistemic = False
if args.n_dropout > 0:
epistemic = True
if dic_out['boxes']: # Only print in case of detections
printer = Printer(images_outputs[1], output_path, kk, output_types=args.output_types
, z_max=args.z_max, epistemic=epistemic)
figures, axes = printer.factory_axes()
printer.draw(figures, axes, dic_out, images_outputs[1], draw_box=args.draw_box,
save=True, show=args.show)
if 'json' in args.output_types:
with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
json.dump(dic_out, ff)

View File

@ -1,87 +0,0 @@
import json
import os
from openpifpaf import show
from ..visuals.printer import Printer
def factory_for_gt(im_size, name=None, path_gt=None):
"""Look for ground-truth annotations file and define calibration matrix based on image size """
try:
with open(path_gt, 'r') as f:
dic_names = json.load(f)
print('-' * 120 + "\nGround-truth file opened")
except (FileNotFoundError, TypeError):
print('-' * 120 + "\nGround-truth file not found")
dic_names = {}
try:
kk = dic_names[name]['K']
dic_gt = dic_names[name]
print("Matched ground-truth file!")
except KeyError:
dic_gt = None
x_factor = im_size[0] / 1600
y_factor = im_size[1] / 900
pixel_factor = (x_factor + y_factor) / 2 # TODO remove and check it
if im_size[0] / im_size[1] > 2.5:
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
else:
kk = [[1266.4 * pixel_factor, 0., 816.27 * x_factor],
[0, 1266.4 * pixel_factor, 491.5 * y_factor],
[0., 0., 1.]] # nuScenes calibration
print("Using a standard calibration matrix...")
return kk, dic_gt
def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None):
"""Output json files or images according to the choice"""
# Save json file
if 'pifpaf' in args.networks:
keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:]
# Visualizer
keypoint_painter = show.KeypointPainter(show_box=False)
skeleton_painter = show.KeypointPainter(show_box=False, color_connections=True,
markersize=1, linewidth=4)
if 'json' in args.output_types and keypoint_sets.size > 0:
with open(output_path + '.pifpaf.json', 'w') as f:
json.dump(pifpaf_out, f)
if 'keypoints' in args.output_types:
with show.image_canvas(images_outputs[0],
output_path + '.keypoints.png',
show=args.show,
fig_width=args.figure_width,
dpi_factor=args.dpi_factor) as ax:
keypoint_painter.keypoints(ax, keypoint_sets)
if 'skeleton' in args.output_types:
with show.image_canvas(images_outputs[0],
output_path + '.skeleton.png',
show=args.show,
fig_width=args.figure_width,
dpi_factor=args.dpi_factor) as ax:
skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
if 'monoloco' in args.networks:
if any((xx in args.output_types for xx in ['front', 'bird', 'combined'])):
epistemic = False
if args.n_dropout > 0:
epistemic = True
if dic_out['boxes']: # Only print in case of detections
printer = Printer(images_outputs[1], output_path, kk, output_types=args.output_types
, z_max=args.z_max, epistemic=epistemic)
figures, axes = printer.factory_axes()
printer.draw(figures, axes, dic_out, images_outputs[1], draw_box=args.draw_box,
save=True, show=args.show)
if 'json' in args.output_types:
with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
json.dump(dic_out, ff)

View File

@ -1,72 +0,0 @@
import os
from PIL import Image
import torch
from ..predict.pifpaf import PifPaf, ImageList
from ..predict.network import MonoLoco
from ..predict.factory import factory_for_gt, factory_outputs
from ..utils.pifpaf import preprocess_pif
def predict(args):
cnt = 0
# load pifpaf and monoloco models
pifpaf = PifPaf(args)
monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
# data
data = ImageList(args.images, scale=args.scale)
data_loader = torch.utils.data.DataLoader(
data, batch_size=1, shuffle=False,
pin_memory=args.pin_memory, num_workers=args.loader_workers)
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
images = image_tensors.permute(0, 2, 3, 1)
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
fields_batch = pifpaf.fields(processed_images)
# unbatch
for image_path, image, processed_image_cpu, fields in zip(
image_paths, images, processed_images_cpu, fields_batch):
if args.output_directory is None:
output_path = image_path
else:
file_name = os.path.basename(image_path)
output_path = os.path.join(args.output_directory, file_name)
print('image', idx, image_path, output_path)
keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
if 'monoloco' in args.networks:
im_size = (float(image.size()[1] / args.scale),
float(image.size()[0] / args.scale)) # Width, Height (original)
# Extract calibration matrix and ground truth file if present
with open(image_path, 'rb') as f:
pil_image = Image.open(f).convert('RGB')
images_outputs.append(pil_image)
im_name = os.path.basename(image_path)
kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
# Preprocess pifpaf outputs and run monoloco
boxes, keypoints = preprocess_pif(pifpaf_out, im_size)
outputs, varss = monoloco.forward(keypoints, kk)
dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dic_gt)
else:
dic_out = None
kk = None
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
print('Image {}\n'.format(cnt) + '-' * 120)
cnt += 1

View File

@ -0,0 +1,3 @@
from .preprocess_nu import PreprocessNuscenes
from .preprocess_ki import PreprocessKitti

View File

@ -8,12 +8,9 @@ from collections import defaultdict
import json
import datetime
from ..prep.transforms import transform_keypoints
from ..utils.kitti import get_calibration, split_training, parse_ground_truth
from ..utils.network import get_monoloco_inputs
from ..utils.pifpaf import preprocess_pif
from ..utils.iou import get_iou_matches
from ..utils.misc import append_cluster
from .transforms import transform_keypoints
from ..utils import get_calibration, split_training, parse_ground_truth, get_iou_matches, append_cluster
from ..network.process import preprocess_pifpaf, preprocess_monoloco
class PreprocessKitti:
@ -84,10 +81,10 @@ class PreprocessKitti:
try:
with open(os.path.join(self.dir_ann, basename + '.png.pifpaf.json'), 'r') as f:
annotations = json.load(f)
boxes, keypoints = preprocess_pif(annotations, im_size=(1238, 374))
boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1238, 374))
keypoints_hflip = transform_keypoints(keypoints, mode='flip')
inputs = get_monoloco_inputs(keypoints, kk).tolist()
inputs_hflip = get_monoloco_inputs(keypoints, kk).tolist()
inputs = preprocess_monoloco(keypoints, kk).tolist()
inputs_hflip = preprocess_monoloco(keypoints, kk).tolist()
all_keypoints = [keypoints, keypoints_hflip]
all_inputs = [inputs, inputs_hflip]

View File

@ -10,16 +10,11 @@ from collections import defaultdict
import datetime
import numpy as np
from nuscenes.nuscenes import NuScenes
from nuscenes.utils import splits
from ..utils.iou import get_iou_matches
from ..utils.misc import append_cluster
from ..utils.nuscenes import select_categories
from ..utils.camera import project_3d
from ..utils.pifpaf import preprocess_pif
from ..utils.network import get_monoloco_inputs
from ..utils import get_iou_matches, append_cluster, select_categories, project_3d
from ..network.process import preprocess_pifpaf, preprocess_monoloco
class PreprocessNuscenes:
@ -97,12 +92,12 @@ class PreprocessNuscenes:
if exists:
with open(path_pif, 'r') as file:
annotations = json.load(file)
boxes, keypoints = preprocess_pif(annotations, im_size=(1600, 900))
boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900))
else:
continue
if keypoints:
inputs = get_monoloco_inputs(keypoints, kk).tolist()
inputs = preprocess_monoloco(keypoints, kk).tolist()
matches = get_iou_matches(boxes, boxes_gt, self.iou_min)
for (idx, idx_gt) in matches:

View File

@ -1,20 +1,11 @@
# pylint: skip-file
import argparse
from openpifpaf.network import nets
from openpifpaf import decoder
from .prep.preprocess_nu import PreprocessNuscenes
from .prep.preprocess_ki import PreprocessKitti
from .predict.predict import predict
from .train.trainer import Trainer
from .eval.generate_kitti import GenerateKitti
from .eval.geom_baseline import geometric_baseline
from .train.hyp_tuning import HypTuning
from .eval.eval_kitti import EvalKitti
from .visuals.webcam import webcam
def cli():
@ -105,28 +96,33 @@ def cli():
def main():
args = cli()
if args.command == 'predict':
if args.webcam:
from .visuals.webcam import webcam
webcam(args)
else:
from .predict import predict
predict(args)
elif args.command == 'prep':
if 'nuscenes' in args.dataset:
from .prep import PreprocessNuscenes
prep = PreprocessNuscenes(args.dir_ann, args.dir_nuscenes, args.dataset, args.iou_min)
prep.run()
if 'kitti' in args.dataset:
from .prep import PreprocessKitti
prep = PreprocessKitti(args.dir_ann, args.iou_min)
prep.run()
elif args.command == 'train':
from .train import HypTuning
if args.hyp:
hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs,
baseline=args.baseline, dropout=args.dropout,
multiplier=args.multiplier, r_seed=args.r_seed)
hyp_tuning.train()
else:
from .train import Trainer
training = Trainer(joints=args.joints, epochs=args.epochs, bs=args.bs,
baseline=args.baseline, dropout=args.dropout, lr=args.lr, sched_step=args.sched_step,
n_stage=args.n_stage, sched_gamma=args.sched_gamma, hidden_size=args.hidden_size,
@ -137,20 +133,24 @@ def main():
elif args.command == 'eval':
if args.geometric:
from .eval import geometric_baseline
geometric_baseline(args.joints)
if args.generate:
from .eval import GenerateKitti
kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout)
kitti_txt.run_mono()
if args.stereo:
kitti_txt.run_stereo()
if args.dataset == 'kitti':
from .eval import EvalKitti
kitti_eval = EvalKitti(verbose=args.verbose, stereo=args.stereo)
kitti_eval.run()
kitti_eval.printer(show=args.show)
if 'nuscenes' in args.dataset:
from .train import Trainer
training = Trainer(joints=args.joints)
_ = training.evaluate(load=True, model=args.model, debug=False)

View File

@ -0,0 +1,3 @@
from .hyp_tuning import HypTuning
from .trainer import Trainer

View File

@ -1,4 +1,4 @@
# pylint: skip-file # TODO
# pylint: skip-file # TODO make train file and class trainer and
"""
Training and evaluation of a neural network which predicts 3D localization and confidence intervals
@ -19,11 +19,11 @@ import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
from .datasets import KeypointsDataset
from .architectures import LinearModel
from .losses import LaplacianLoss
from ..utils.logs import set_logger
from ..utils.network import laplace_sampling, unnormalize_bi
from train.datasets import KeypointsDataset
from ..network import LaplacianLoss
from ..network.process import laplace_sampling, unnormalize_bi
from ..network.architectures import LinearModel
from ..utils import set_logger
class Trainer:

View File

@ -0,0 +1,8 @@
from .iou import get_iou_matches, reorder_matches, get_iou_matrix
from .misc import get_task_error, get_pixel_error, append_cluster
from .kitti import check_conditions, get_category, split_training, parse_ground_truth, get_calibration
from .camera import xyz_from_distance, get_keypoints, pixel_to_camera, project_3d
from .logs import set_logger
from .stereo import depth_from_disparity
from ..utils.nuscenes import select_categories

View File

@ -1,6 +1,4 @@
import random
def append_cluster(dic_jo, phase, xx, dd, kps):
"""Append the annotation based on its distance"""
@ -27,20 +25,19 @@ def append_cluster(dic_jo, phase, xx, dd, kps):
def get_task_error(dd, mode='std'):
"""Get target error not knowing the gender"""
"""Get target error not knowing the gender, modeled through a Gaussian Mixure model"""
assert mode in ('std', 'mad')
h_mean = 171.5 # average h of the human distribution
if mode == 'std':
mm_gender = 0.0557
elif mode == 'mad': # mean absolute deviation
mm_gender = 0.0457
return mm_gender * dd
delta_h = 9.07 # delta h for 63% confidence interval
elif mode == 'mad':
delta_h = 7.83 # delta_h of mean absolute deviation
return dd * (1 - h_mean / (h_mean + delta_h))
def get_pixel_error(dd_gt, zz_gt):
"""calculate error in stereo distance due to +-1 pixel mismatch (function of depth)"""
"""calculate error in stereo distance due to 1 pixel mismatch (function of depth)"""
disp = 0.54 * 721 / zz_gt
random.seed(1)
sign = random.choice((-1, 1))
delta_z = zz_gt - 0.54 * 721 / (disp + sign)
delta_z = zz_gt - 0.54 * 721 / (disp - 1)
return dd_gt + delta_z

View File

@ -1,67 +0,0 @@
import numpy as np
import torch
from ..utils.camera import get_keypoints, pixel_to_camera
def get_monoloco_inputs(keypoints, kk):
""" Preprocess batches of inputs
keypoints = torch tensors of (m, 3, 17) or list [3,17]
Outputs = torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box
"""
if isinstance(keypoints, list):
keypoints = torch.tensor(keypoints)
if isinstance(kk, list):
kk = torch.tensor(kk)
# Projection in normalized image coordinates and zero-center with the center of the bounding box
uv_center = get_keypoints(keypoints, mode='center')
xy1_center = pixel_to_camera(uv_center, kk, 10)
xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10)
# xy1_center[:, 1].fill_(0) #TODO
kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3)
kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view
return kps_out
def laplace_sampling(outputs, n_samples):
# np.random.seed(1)
mu = outputs[:, 0]
bi = torch.abs(outputs[:, 1])
# Analytical
# uu = np.random.uniform(low=-0.5, high=0.5, size=mu.shape[0])
# xx = mu - bi * np.sign(uu) * np.log(1 - 2 * np.abs(uu))
# Sampling
cuda_check = outputs.is_cuda
if cuda_check:
get_device = outputs.get_device()
device = torch.device(type="cuda", index=get_device)
else:
device = torch.device("cpu")
laplace = torch.distributions.Laplace(mu, bi)
xx = laplace.sample((n_samples,)).to(device)
return xx
def epistemic_variance(total_outputs):
"""Compute epistemic variance"""
# var_y = np.sum(total_outputs**2, axis=0) / total_outputs.shape[0] - (np.mean(total_outputs, axis=0))**2
var_y = np.var(total_outputs, axis=0)
lower_b = np.quantile(a=total_outputs, q=0.25, axis=0)
upper_b = np.quantile(a=total_outputs, q=0.75, axis=0)
var_new = (upper_b - lower_b)
return var_y, var_new
def unnormalize_bi(outputs):
"""Unnormalize relative bi of a nunmpy array"""
outputs[:, 1] = torch.exp(outputs[:, 1]) * outputs[:, 0]
return outputs

View File

@ -2,6 +2,7 @@
import random
import json
import os
import numpy as np

View File

@ -1,54 +0,0 @@
import numpy as np
def preprocess_pif(annotations, im_size=None):
"""
Preprocess pif annotations:
1. enlarge the box of 10%
2. Constraint it inside the image (if image_size provided)
"""
boxes = []
keypoints = []
for dic in annotations:
box = dic['bbox']
if box[3] < 0.5: # Check for no detections (boxes 0,0,0,0)
return [], []
kps = prepare_pif_kps(dic['keypoints'])
conf = float(np.sort(np.array(kps[2]))[-3]) # The confidence is the 3rd highest value for the keypoints
# Add 15% for y and 20% for x
delta_h = (box[3] - box[1]) / 7
delta_w = (box[2] - box[0]) / 3.5
assert delta_h > -5 and delta_w > -5, "Bounding box <=0"
box[0] -= delta_w
box[1] -= delta_h
box[2] += delta_w
box[3] += delta_h
# Put the box inside the image
if im_size is not None:
box[0] = max(0, box[0])
box[1] = max(0, box[1])
box[2] = min(box[2], im_size[0])
box[3] = min(box[3], im_size[1])
box.append(conf)
boxes.append(box)
keypoints.append(kps)
return boxes, keypoints
def prepare_pif_kps(kps_in):
"""Convert from a list of 51 to a list of 3, 17"""
assert len(kps_in) % 3 == 0, "keypoints expected as a multiple of 3"
xxs = kps_in[0:][::3]
yys = kps_in[1:][::3] # from offset 1 every 3
ccs = kps_in[2:][::3]
return [xxs, yys, ccs]

View File

@ -49,7 +49,7 @@ def filter_disparities(kps, kps_right_list, idx, expected_disps):
disparity_y_conf = np.where(mask_conf, disparity_y, np.nan)
# Mask outliers using iqr
mask_outlier = get_iqr_mask(disparity_x_conf)
mask_outlier = interquartile_mask(disparity_x_conf)
disparity_x_mask = np.where(mask_outlier, disparity_x_conf, np.nan)
disparity_y_mask = np.where(mask_outlier, disparity_y_conf, np.nan)
avg_disparity_x = np.nanmedian(disparity_x_mask, axis=1) # ignore the nan
@ -79,7 +79,7 @@ def verify_stereo(zz_stereo, zz_mono, disparity_x, disparity_y):
return False
def get_iqr_mask(distribution):
def interquartile_mask(distribution):
quartile_1, quartile_3 = np.nanpercentile(distribution, [25, 75], axis=1)
iqr = quartile_3 - quartile_1
lower_bound = quartile_1 - (iqr * 1.5)

View File

@ -0,0 +1,3 @@
from .printer import Printer
from .results import print_results

View File

@ -1,11 +1,12 @@
# pylint: skip-file
import numpy as np
import os
import math
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
from visuals.printer import get_angle
from .printer import get_angle
def paper():
@ -112,30 +113,24 @@ def paper():
plt.close()
def target_error(xx, mm):
return mm * xx
def gmm():
mu_men = 178
std_men = 7
mu_women = 165
std_women = 7
N_men_1 = np.random.normal(mu_men, std_men, 1000000)
N_men_2 = np.random.normal(mu_men, std_men, 1000000)
N_women_1 = np.random.normal(mu_women, std_women, 1000000)
N_women_2 = np.random.normal(mu_women, std_women, 1000000)
N_gmm_1 = np.concatenate((N_men_1, N_women_1))
N_gmm_2 = np.concatenate((N_men_2, N_women_2))
mu_gmm_1 = np.mean(N_gmm_1)
mu_gmm_2 = np.mean(N_gmm_2)
std_gmm = np.std(N_gmm_1)
mm_gender = std_gmm / mu_gmm_1
var_gmm = np.var(N_gmm_1)
abs_diff_1 = np.abs(mu_gmm_1 - N_gmm_1)
abs_diff_2 = np.mean(np.abs(N_gmm_1 - N_gmm_2))
mean_deviation_1 = np.mean(abs_diff_1)
mean_deviation_2 = np.mean(abs_diff_2)
N_men = np.random.normal(mu_men, std_men, 10000000)
N_women = np.random.normal(mu_women, std_women, 10000000)
N_gmm = np.concatenate((N_men, N_women))
perc, _ = np.nanpercentile(N_gmm, [18.5, 81.5]) # Laplace bi => 63%
mu_gmm = np.mean(N_gmm)
bi_gmm = mu_gmm - perc
abs_diff = np.abs(mu_gmm - N_gmm)
mean_deviation = np.mean(abs_diff)
# sns.distplot(N_men, hist=False, rug=False, label="Men")
# sns.distplot(N_women, hist=False, rug=False, label="Women")
# sns.distplot(N_gmm, hist=False, rug=False, label="GMM")
@ -143,15 +138,10 @@ def gmm():
# plt.ylabel("Height distributions of men and women")
# plt.legend()
# plt.show()
print("Mean of GMM distribution: {:.2f}".format(mu_gmm_1))
print("Standard deviation: {:.2f}".format(std_gmm))
print("Relative error (standard deviation) {:.3f} %".format(mm_gender * 100))
print("Variance: {:.2f}".format(var_gmm))
print("Mean deviation: {:.2f}".format(mean_deviation_1))
print("Mean deviation 2: {:.2f}".format(mean_deviation_2))
print("Relative error (mean absolute deviation): {:.3f} %".format((mean_deviation_1 / mu_gmm_1) * 100))
return mm_gender
print("Mean of GMM distribution: {:.2f}".format(mu_gmm))
print("+- bi interval (63%) : {:.2f}".format(bi_gmm))
print("Mean deviation: {:.2f}".format(mean_deviation))
print("Relative error (mean absolute deviation): {:.3f} %".format((mean_deviation / mu_gmm) * 100))
def get_confidence(xx, zz, std):
@ -160,4 +150,4 @@ def get_confidence(xx, zz, std):
delta_x = std * math.cos(theta)
delta_z = std * math.sin(theta)
return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z)
return (xx - delta_x, xx + delta_x), (zz - delta_z, zz + delta_z)

View File

@ -9,8 +9,7 @@ import matplotlib.cm as cm
from matplotlib.patches import Ellipse, Circle, Rectangle
from mpl_toolkits.axes_grid1 import make_axes_locatable
from ..utils.camera import pixel_to_camera
from ..utils.misc import get_task_error
from ..utils import pixel_to_camera, get_task_error
class Printer:

View File

@ -1,6 +1,7 @@
# pylint: disable=R0915
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse

View File

@ -12,14 +12,11 @@ import torch
import matplotlib.pyplot as plt
from PIL import Image
from openpifpaf import transforms
import cv2
from ..visuals.printer import Printer
from ..utils.pifpaf import preprocess_pif
from ..predict.pifpaf import PifPaf
from ..predict.network import MonoLoco
from ..predict.factory import factory_for_gt
from ..visuals import Printer
from ..network import PifPaf, MonoLoco
from ..network.process import preprocess_pifpaf, factory_for_gt
def webcam(args):
@ -66,7 +63,7 @@ def webcam(args):
visualizer_monoloco.send(None)
if pifpaf_out:
boxes, keypoints = preprocess_pif(pifpaf_out, (width, height))
boxes, keypoints = preprocess_pifpaf(pifpaf_out, (width, height))
outputs, varss = monoloco.forward(keypoints, kk)
dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dict_gt)
visualizer_monoloco.send((pil_image, dic_out))

View File

@ -10,9 +10,9 @@ setup(
version=VERSION,
packages=[
'monoloco',
'monoloco.train',
'monoloco.predict',
'monoloco.network',
'monoloco.eval',
'monoloco.train',
'monoloco.prep',
'monoloco.visuals',
'monoloco.utils'
@ -28,9 +28,15 @@ setup(
install_requires=[
'openpifpaf',
'nuscenes-devkit', # for nuScenes dataset preprocessing
'tabulate', # For evaluation
'pylint',
'pytest',
],
extras_require={
'test': [
'pylint',
'pytest',
],
'prep': [
'nuscenes-devkit',
],
},
)

View File

@ -6,7 +6,7 @@ sys.path.insert(0, os.path.join('..', 'monoloco'))
def test_iou():
from monoloco.utils.iou import get_iou_matrix
from monoloco.utils import get_iou_matrix
boxes_pred = [[1, 100, 1, 200]]
boxes_gt = [[100., 120., 150., 160.],[12, 110, 130., 160.]]
iou_matrix = get_iou_matrix(boxes_pred, boxes_gt)
@ -14,7 +14,7 @@ def test_iou():
def test_pixel_to_camera():
from monoloco.utils.camera import pixel_to_camera
from monoloco.utils import pixel_to_camera
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]]
zz = 10
uv_vector = [1000., 400.]