* add webcam skeleton

* fix bug

* adapt intrinsic matrix and assertion on negative boxes

* delete patches

* add support for bird view

* remove ground truth legend and remove unnecessary resizing

* add class pifpaf

* add keypoints scaling if not webcam

* add comments

* remove space

* add gif

* fix gif name

* update readme

* update readme
This commit is contained in:
Lorenzo Bertoni 2019-07-03 11:39:43 +02:00 committed by GitHub
parent f0150da571
commit eae0ad5f7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 265 additions and 104 deletions

View File

@ -67,6 +67,19 @@ To check all the commands for the parser and the subparsers run:
* `python3 src/main.py train --help`
* `python3 src/main.py eval --help`
# Webcam
<img src="docs/webcam_short.gif" height=250 alt="example image" />
MonoLoco can run on personal computers with no GPU and low resolution images (e.g. 256x144).
It support 3 types of visualizations: `front`, `bird` and `combined`
Multiple visualizations can be combined in different windows.
The above gif has been obtained running on a Macbook the command:
`python src/main.py predict --webcam --scale 0.2 --output_types combined --z_max 10`
# Predict
The predict script receives an image (or an entire folder using glob expressions),

Binary file not shown.

Before

Width:  |  Height:  |  Size: 694 KiB

BIN
docs/webcam_short.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

View File

@ -15,6 +15,7 @@ from eval.generate_kitti import generate_kitti
from eval.geom_baseline import geometric_baseline
from models.hyp_tuning import HypTuning
from eval.kitti_eval import KittiEval
from visuals.webcam import webcam
def cli():
@ -57,7 +58,7 @@ def cli():
# 2) Monoloco argument
predict_parser.add_argument('--model', help='path of MonoLoco model to load',
default="data/models/best_model__seed_2_.pickle")
default="data/models/monoloco-190513-1437.pkl")
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=256)
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
default='data/arrays/names-kitti-190513-1754.json')
@ -67,7 +68,7 @@ def cli():
predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=22)
predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
predict_parser.add_argument('--combined', help='to print combined images', action='store_true')
predict_parser.add_argument('--webcam', help='monoloco streaming', action='store_true')
# Training
training_parser.add_argument('--joints', help='Json file with input joints',
@ -107,7 +108,10 @@ def main():
args = cli()
if args.command == 'predict':
_ = predict(args)
if args.webcam:
webcam(args)
else:
predict(args)
elif args.command == 'prep':
if 'nuscenes' in args.dataset:

110
src/predict/pifpaf.py Normal file
View File

@ -0,0 +1,110 @@
import glob
import numpy as np
import torchvision
import torch
from PIL import Image, ImageFile
from openpifpaf.network import nets
from openpifpaf import decoder
from openpifpaf import transforms
class ImageList(torch.utils.data.Dataset):
"""It defines transformations to apply to images and outputs of the dataloader"""
def __init__(self, image_paths, scale, image_transform=None):
self.image_paths = image_paths
self.image_transform = image_transform or transforms.image_transform # to_tensor + normalize (from pifpaf)
self.scale = scale
# data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2
# .0)
def __getitem__(self, index):
image_path = self.image_paths[index]
ImageFile.LOAD_TRUNCATED_IMAGES = True
with open(image_path, 'rb') as f:
image = Image.open(f).convert('RGB')
if self.scale > 1.01 or self.scale < 0.99:
image = torchvision.transforms.functional.resize(image,
(round(self.scale * image.size[1]),
round(self.scale * image.size[0])),
interpolation=Image.BICUBIC)
# PIL images are not iterables
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
image = self.image_transform(image)
return image_path, original_image, image
def __len__(self):
return len(self.image_paths)
def factory_from_args(args):
# Merge the model_pifpaf argument
if not args.checkpoint:
args.checkpoint = args.model_pifpaf
# glob
if not args.webcam:
if args.glob:
args.images += glob.glob(args.glob)
if not args.images:
raise Exception("no image files given")
# add args.device
args.device = torch.device('cpu')
args.pin_memory = False
if torch.cuda.is_available():
args.device = torch.device('cuda')
args.pin_memory = True
# Add num_workers
args.loader_workers = 8
# Add visualization defaults
args.figure_width = 10
args.dpi_factor = 1.0
return args
class PifPaf:
def __init__(self, args):
"""Instanciate the mdodel"""
factory_from_args(args)
model_pifpaf, _ = nets.factory_from_args(args)
model_pifpaf = model_pifpaf.to(args.device)
self.processor = decoder.factory_from_args(args, model_pifpaf)
self.keypoints_whole = []
# Scale the keypoints to the original image size for printing (if not webcam)
if not args.webcam:
self.scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
else:
self.scale_np = np.array([1, 1, 1] * 17).reshape(17, 3)
def fields(self, processed_images):
"""Encoder for pif and paf fields"""
fields_batch = self.processor.fields(processed_images)
return fields_batch
def forward(self, image, processed_image_cpu, fields):
"""Decoder, from pif and paf fields to keypoints"""
self.processor.set_cpu_image(image, processed_image_cpu)
keypoint_sets, scores = self.processor.keypoint_sets(fields)
if keypoint_sets.size > 0:
self.keypoints_whole.append(np.around((keypoint_sets / self.scale_np), 1)
.reshape(keypoint_sets.shape[0], -1).tolist())
pifpaf_out = [
{'keypoints': np.around(kps / self.scale_np, 1).reshape(-1).tolist(),
'bbox': [np.min(kps[:, 0]) / self.scale_np[0, 0], np.min(kps[:, 1]) / self.scale_np[0, 0],
np.max(kps[:, 0]) / self.scale_np[0, 0], np.max(kps[:, 1]) / self.scale_np[0, 0]]}
for kps in keypoint_sets
]
return keypoint_sets, scores, pifpaf_out

View File

@ -1,90 +1,21 @@
import glob
import os
from PIL import Image
import numpy as np
import torchvision
import torch
from PIL import Image, ImageFile
from openpifpaf.network import nets
from openpifpaf import decoder
from openpifpaf import transforms
from predict.pifpaf import PifPaf, ImageList
from predict.monoloco import MonoLoco
from predict.factory import factory_for_gt, factory_outputs
from utils.pifpaf import preprocess_pif
class ImageList(torch.utils.data.Dataset):
"""It defines transformations to apply to images and outputs of the dataloader"""
def __init__(self, image_paths, scale, image_transform=None):
self.image_paths = image_paths
self.image_transform = image_transform or transforms.image_transform # to_tensor + normalize (from pifpaf)
self.scale = scale
# data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2
# .0)
def __getitem__(self, index):
image_path = self.image_paths[index]
ImageFile.LOAD_TRUNCATED_IMAGES = True
with open(image_path, 'rb') as f:
image = Image.open(f).convert('RGB')
if self.scale > 1.01 or self.scale < 0.99:
image = torchvision.transforms.functional.resize(image,
(round(self.scale * image.size[1]),
round(self.scale * image.size[0])),
interpolation=Image.BICUBIC)
# PIL images are not iterables
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
image = self.image_transform(image)
return image_path, original_image, image
def __len__(self):
return len(self.image_paths)
def factory_from_args(args):
# Merge the model_pifpaf argument
if not args.checkpoint:
args.checkpoint = args.model_pifpaf
# glob
if args.glob:
args.images += glob.glob(args.glob)
if not args.images:
raise Exception("no image files given")
# add args.device
args.device = torch.device('cpu')
args.pin_memory = False
if torch.cuda.is_available():
args.device = torch.device('cuda')
args.pin_memory = True
# Add num_workers
args.loader_workers = 8
# Add visualization defaults
args.figure_width = 10
args.dpi_factor = 1.0
return args
def predict(args):
cnt = 0
factory_from_args(args)
# load pifpaf model
model_pifpaf, _ = nets.factory_from_args(args)
model_pifpaf = model_pifpaf.to(args.device)
processor = decoder.factory_from_args(args, model_pifpaf)
# load monoloco
# load pifpaf and monoloco models
pifpaf = PifPaf(args)
monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
# data
@ -93,19 +24,15 @@ def predict(args):
data, batch_size=1, shuffle=False,
pin_memory=args.pin_memory, num_workers=args.loader_workers)
keypoints_whole = []
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
images = image_tensors.permute(0, 2, 3, 1)
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
fields_batch = processor.fields(processed_images)
fields_batch = pifpaf.fields(processed_images)
# unbatch
for image_path, image, processed_image_cpu, fields in zip(
image_paths,
images,
processed_images_cpu,
fields_batch):
image_paths, images, processed_images_cpu, fields_batch):
if args.output_directory is None:
output_path = image_path
@ -114,22 +41,7 @@ def predict(args):
output_path = os.path.join(args.output_directory, file_name)
print('image', idx, image_path, output_path)
processor.set_cpu_image(image, processed_image_cpu)
keypoint_sets, scores = processor.keypoint_sets(fields)
# Correct to not change the confidence
scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
if keypoint_sets.size > 0:
keypoints_whole.append(np.around((keypoint_sets / scale_np), 1)
.reshape(keypoint_sets.shape[0], -1).tolist())
pifpaf_out = [
{'keypoints': np.around(kps / scale_np, 1).reshape(-1).tolist(),
'bbox': [np.min(kps[:, 0]) / args.scale, np.min(kps[:, 1]) / args.scale,
np.max(kps[:, 0]) / args.scale, np.max(kps[:, 1]) / args.scale]}
for kps in keypoint_sets
]
keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
@ -138,7 +50,6 @@ def predict(args):
float(image.size()[0] / args.scale)) # Width, Height (original)
# Extract calibration matrix and ground truth file if present
with open(image_path, 'rb') as f:
pil_image = Image.open(f).convert('RGB')
images_outputs.append(pil_image)
@ -159,4 +70,3 @@ def predict(args):
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
print('Image {}\n'.format(cnt) + '-' * 120)
cnt += 1
return keypoints_whole

View File

@ -24,7 +24,7 @@ def preprocess_pif(annotations, im_size=None):
# Add 10% for y
delta_h = (box[3] - box[1]) / 10
delta_w = (box[2] - box[0]) / 10
assert delta_h > 0 and delta_w > 0, "Bounding box <=0"
assert delta_h > -5 and delta_w > -5, "Bounding box <=0"
box[0] -= delta_w
box[1] -= delta_h
box[2] += delta_w

View File

@ -73,7 +73,8 @@ class Printer:
"combined figure cannot be print together with front or bird ones"
self.y_scale = self.width / (self.height * 1.8) # Defined proportion
self.im = self.im.resize((self.width, round(self.height * self.y_scale)))
if self.y_scale < 0.95 or self.y_scale > 1.05: # allows more variation without resizing
self.im = self.im.resize((self.width, round(self.height * self.y_scale)))
self.width = self.im.size[0]
self.height = self.im.size[1]
fig_width = self.fig_width + 0.6 * self.fig_width
@ -178,9 +179,10 @@ class Printer:
ellipse_real = Ellipse((self.xx_gt[idx], self.zz_gt[idx]), width=target * 2, height=1,
angle=angle, color='lightgreen', fill=True, label="Task error")
axes[1].add_patch(ellipse_real)
axes[1].plot(self.xx_gt[idx], self.zz_gt[idx], 'kx', label="Ground truth", markersize=3)
if abs(self.zz_gt[idx] - self.zz_pred[idx]) > 0.001:
axes[1].plot(self.xx_gt[idx], self.zz_gt[idx], 'kx', label="Ground truth", markersize=3)
# Print prediction and the real ground truth. Color of prediction depends if ground truth exists
# Print prediction and the real ground truth.
num = 0
for idx, _ in enumerate(self.xx_pred):
if self.zz_gt[idx] > 0: # only the merging ones and inside the interval

122
src/visuals/webcam.py Normal file
View File

@ -0,0 +1,122 @@
"""
Webcam demo application
Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/openpifpaf/webcam.py
"""
import time
import torch
import matplotlib.pyplot as plt
from PIL import Image
from openpifpaf import transforms
import cv2
from visuals.printer import Printer
from utils.pifpaf import preprocess_pif
from predict.pifpaf import PifPaf
from predict.monoloco import MonoLoco
from predict.factory import factory_for_gt
def webcam(args):
# add args.device
args.device = torch.device('cpu')
if torch.cuda.is_available():
args.device = torch.device('cuda')
# load models
args.camera = True
pifpaf = PifPaf(args)
monoloco = MonoLoco(model_path=args.model, device=args.device)
# Start recording
cam = cv2.VideoCapture(0)
visualizer_monoloco = None
while True:
ret, frame = cam.read()
image = cv2.resize(frame, None, fx=args.scale, fy=args.scale)
height, width, _ = image.shape
print('resized image size: {}'.format(image.shape))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
processed_image_cpu = transforms.image_transform(image.copy())
processed_image = processed_image_cpu.contiguous().to(args.device, non_blocking=True)
fields = pifpaf.fields(torch.unsqueeze(processed_image, 0))[0]
_, _, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
if not ret:
break
key = cv2.waitKey(1)
if key % 256 == 27:
# ESC pressed
print("Escape hit, closing...")
break
pil_image = Image.fromarray(image)
intrinsic_size = [xx * 1.3 for xx in pil_image.size]
kk, dict_gt = factory_for_gt(intrinsic_size) # better intrinsics for mac camera
if visualizer_monoloco is None:
visualizer_monoloco = VisualizerMonoloco(kk, args)(pil_image)
visualizer_monoloco.send(None)
if pifpaf_out:
boxes, keypoints = preprocess_pif(pifpaf_out, (width, height))
outputs, varss = monoloco.forward(keypoints, kk)
dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dict_gt)
visualizer_monoloco.send((pil_image, dic_out))
cam.release()
cv2.destroyAllWindows()
class VisualizerMonoloco:
def __init__(self, kk, args, epistemic=False):
self.kk = kk
self.args = args
self.z_max = args.z_max
self.epistemic = epistemic
self.output_types = args.output_types
def __call__(self, first_image, fig_width=4.0, **kwargs):
if 'figsize' not in kwargs:
kwargs['figsize'] = (fig_width, fig_width * first_image.size[0] / first_image.size[1])
printer = Printer(first_image, output_path="", kk=self.kk, output_types=self.output_types,
z_max=self.z_max, epistemic=self.epistemic)
figures, axes = printer.factory_axes()
for fig in figures:
fig.show()
while True:
image, dict_ann = yield
draw_start = time.time()
while axes and ((axes[0] and axes[0].patches) or (axes[-1] and axes[-1].patches)):
if axes[0]:
del axes[0].patches[0]
del axes[0].texts[0]
if len(axes) == 2:
del axes[1].patches[0]
del axes[1].patches[0] # the one became the 0
if len(axes[1].lines) > 2:
del axes[1].lines[2]
del axes[1].texts[0]
printer.draw(figures, axes, dict_ann, image)
print('draw', time.time() - draw_start)
mypause(0.01)
def mypause(interval):
manager = plt._pylab_helpers.Gcf.get_active()
if manager is not None:
canvas = manager.canvas
if canvas.figure.stale:
canvas.draw_idle()
canvas.start_event_loop(interval)
else:
time.sleep(interval)