Webcam (#5)
* add webcam skeleton * fix bug * adapt intrinsic matrix and assertion on negative boxes * delete patches * add support for bird view * remove ground truth legend and remove unnecessary resizing * add class pifpaf * add keypoints scaling if not webcam * add comments * remove space * add gif * fix gif name * update readme * update readme
This commit is contained in:
parent
f0150da571
commit
eae0ad5f7e
13
README.md
13
README.md
@ -67,6 +67,19 @@ To check all the commands for the parser and the subparsers run:
|
||||
* `python3 src/main.py train --help`
|
||||
* `python3 src/main.py eval --help`
|
||||
|
||||
|
||||
# Webcam
|
||||
<img src="docs/webcam_short.gif" height=250 alt="example image" />
|
||||
|
||||
MonoLoco can run on personal computers with no GPU and low resolution images (e.g. 256x144).
|
||||
|
||||
It support 3 types of visualizations: `front`, `bird` and `combined`
|
||||
Multiple visualizations can be combined in different windows.
|
||||
|
||||
The above gif has been obtained running on a Macbook the command:
|
||||
|
||||
`python src/main.py predict --webcam --scale 0.2 --output_types combined --z_max 10`
|
||||
|
||||
|
||||
# Predict
|
||||
The predict script receives an image (or an entire folder using glob expressions),
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 694 KiB |
BIN
docs/webcam_short.gif
Normal file
BIN
docs/webcam_short.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.7 MiB |
10
src/main.py
10
src/main.py
@ -15,6 +15,7 @@ from eval.generate_kitti import generate_kitti
|
||||
from eval.geom_baseline import geometric_baseline
|
||||
from models.hyp_tuning import HypTuning
|
||||
from eval.kitti_eval import KittiEval
|
||||
from visuals.webcam import webcam
|
||||
|
||||
|
||||
def cli():
|
||||
@ -57,7 +58,7 @@ def cli():
|
||||
|
||||
# 2) Monoloco argument
|
||||
predict_parser.add_argument('--model', help='path of MonoLoco model to load',
|
||||
default="data/models/best_model__seed_2_.pickle")
|
||||
default="data/models/monoloco-190513-1437.pkl")
|
||||
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=256)
|
||||
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
|
||||
default='data/arrays/names-kitti-190513-1754.json')
|
||||
@ -67,7 +68,7 @@ def cli():
|
||||
predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=22)
|
||||
predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
|
||||
predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
|
||||
predict_parser.add_argument('--combined', help='to print combined images', action='store_true')
|
||||
predict_parser.add_argument('--webcam', help='monoloco streaming', action='store_true')
|
||||
|
||||
# Training
|
||||
training_parser.add_argument('--joints', help='Json file with input joints',
|
||||
@ -107,7 +108,10 @@ def main():
|
||||
args = cli()
|
||||
|
||||
if args.command == 'predict':
|
||||
_ = predict(args)
|
||||
if args.webcam:
|
||||
webcam(args)
|
||||
else:
|
||||
predict(args)
|
||||
|
||||
elif args.command == 'prep':
|
||||
if 'nuscenes' in args.dataset:
|
||||
|
||||
110
src/predict/pifpaf.py
Normal file
110
src/predict/pifpaf.py
Normal file
@ -0,0 +1,110 @@
|
||||
|
||||
import glob
|
||||
|
||||
import numpy as np
|
||||
import torchvision
|
||||
import torch
|
||||
from PIL import Image, ImageFile
|
||||
|
||||
from openpifpaf.network import nets
|
||||
from openpifpaf import decoder
|
||||
from openpifpaf import transforms
|
||||
|
||||
|
||||
class ImageList(torch.utils.data.Dataset):
|
||||
"""It defines transformations to apply to images and outputs of the dataloader"""
|
||||
def __init__(self, image_paths, scale, image_transform=None):
|
||||
self.image_paths = image_paths
|
||||
self.image_transform = image_transform or transforms.image_transform # to_tensor + normalize (from pifpaf)
|
||||
self.scale = scale
|
||||
|
||||
# data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2
|
||||
# .0)
|
||||
|
||||
def __getitem__(self, index):
|
||||
image_path = self.image_paths[index]
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
with open(image_path, 'rb') as f:
|
||||
image = Image.open(f).convert('RGB')
|
||||
|
||||
if self.scale > 1.01 or self.scale < 0.99:
|
||||
image = torchvision.transforms.functional.resize(image,
|
||||
(round(self.scale * image.size[1]),
|
||||
round(self.scale * image.size[0])),
|
||||
interpolation=Image.BICUBIC)
|
||||
# PIL images are not iterables
|
||||
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
|
||||
image = self.image_transform(image)
|
||||
|
||||
return image_path, original_image, image
|
||||
|
||||
def __len__(self):
|
||||
return len(self.image_paths)
|
||||
|
||||
|
||||
def factory_from_args(args):
|
||||
|
||||
# Merge the model_pifpaf argument
|
||||
if not args.checkpoint:
|
||||
args.checkpoint = args.model_pifpaf
|
||||
# glob
|
||||
if not args.webcam:
|
||||
if args.glob:
|
||||
args.images += glob.glob(args.glob)
|
||||
if not args.images:
|
||||
raise Exception("no image files given")
|
||||
|
||||
# add args.device
|
||||
args.device = torch.device('cpu')
|
||||
args.pin_memory = False
|
||||
if torch.cuda.is_available():
|
||||
args.device = torch.device('cuda')
|
||||
args.pin_memory = True
|
||||
|
||||
# Add num_workers
|
||||
args.loader_workers = 8
|
||||
|
||||
# Add visualization defaults
|
||||
args.figure_width = 10
|
||||
args.dpi_factor = 1.0
|
||||
|
||||
return args
|
||||
|
||||
|
||||
class PifPaf:
|
||||
def __init__(self, args):
|
||||
"""Instanciate the mdodel"""
|
||||
factory_from_args(args)
|
||||
model_pifpaf, _ = nets.factory_from_args(args)
|
||||
model_pifpaf = model_pifpaf.to(args.device)
|
||||
self.processor = decoder.factory_from_args(args, model_pifpaf)
|
||||
self.keypoints_whole = []
|
||||
|
||||
# Scale the keypoints to the original image size for printing (if not webcam)
|
||||
if not args.webcam:
|
||||
self.scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
|
||||
else:
|
||||
self.scale_np = np.array([1, 1, 1] * 17).reshape(17, 3)
|
||||
|
||||
def fields(self, processed_images):
|
||||
"""Encoder for pif and paf fields"""
|
||||
fields_batch = self.processor.fields(processed_images)
|
||||
return fields_batch
|
||||
|
||||
def forward(self, image, processed_image_cpu, fields):
|
||||
"""Decoder, from pif and paf fields to keypoints"""
|
||||
self.processor.set_cpu_image(image, processed_image_cpu)
|
||||
keypoint_sets, scores = self.processor.keypoint_sets(fields)
|
||||
|
||||
if keypoint_sets.size > 0:
|
||||
self.keypoints_whole.append(np.around((keypoint_sets / self.scale_np), 1)
|
||||
.reshape(keypoint_sets.shape[0], -1).tolist())
|
||||
|
||||
pifpaf_out = [
|
||||
{'keypoints': np.around(kps / self.scale_np, 1).reshape(-1).tolist(),
|
||||
'bbox': [np.min(kps[:, 0]) / self.scale_np[0, 0], np.min(kps[:, 1]) / self.scale_np[0, 0],
|
||||
np.max(kps[:, 0]) / self.scale_np[0, 0], np.max(kps[:, 1]) / self.scale_np[0, 0]]}
|
||||
for kps in keypoint_sets
|
||||
]
|
||||
return keypoint_sets, scores, pifpaf_out
|
||||
|
||||
@ -1,90 +1,21 @@
|
||||
|
||||
import glob
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
import numpy as np
|
||||
import torchvision
|
||||
import torch
|
||||
from PIL import Image, ImageFile
|
||||
|
||||
from openpifpaf.network import nets
|
||||
from openpifpaf import decoder
|
||||
from openpifpaf import transforms
|
||||
from predict.pifpaf import PifPaf, ImageList
|
||||
from predict.monoloco import MonoLoco
|
||||
from predict.factory import factory_for_gt, factory_outputs
|
||||
from utils.pifpaf import preprocess_pif
|
||||
|
||||
|
||||
class ImageList(torch.utils.data.Dataset):
|
||||
"""It defines transformations to apply to images and outputs of the dataloader"""
|
||||
def __init__(self, image_paths, scale, image_transform=None):
|
||||
self.image_paths = image_paths
|
||||
self.image_transform = image_transform or transforms.image_transform # to_tensor + normalize (from pifpaf)
|
||||
self.scale = scale
|
||||
|
||||
# data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2
|
||||
# .0)
|
||||
|
||||
def __getitem__(self, index):
|
||||
image_path = self.image_paths[index]
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
with open(image_path, 'rb') as f:
|
||||
image = Image.open(f).convert('RGB')
|
||||
|
||||
if self.scale > 1.01 or self.scale < 0.99:
|
||||
image = torchvision.transforms.functional.resize(image,
|
||||
(round(self.scale * image.size[1]),
|
||||
round(self.scale * image.size[0])),
|
||||
interpolation=Image.BICUBIC)
|
||||
# PIL images are not iterables
|
||||
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
|
||||
image = self.image_transform(image)
|
||||
|
||||
return image_path, original_image, image
|
||||
|
||||
def __len__(self):
|
||||
return len(self.image_paths)
|
||||
|
||||
|
||||
def factory_from_args(args):
|
||||
|
||||
# Merge the model_pifpaf argument
|
||||
if not args.checkpoint:
|
||||
args.checkpoint = args.model_pifpaf
|
||||
# glob
|
||||
if args.glob:
|
||||
args.images += glob.glob(args.glob)
|
||||
if not args.images:
|
||||
raise Exception("no image files given")
|
||||
|
||||
# add args.device
|
||||
args.device = torch.device('cpu')
|
||||
args.pin_memory = False
|
||||
if torch.cuda.is_available():
|
||||
args.device = torch.device('cuda')
|
||||
args.pin_memory = True
|
||||
|
||||
# Add num_workers
|
||||
args.loader_workers = 8
|
||||
|
||||
# Add visualization defaults
|
||||
args.figure_width = 10
|
||||
args.dpi_factor = 1.0
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def predict(args):
|
||||
|
||||
cnt = 0
|
||||
factory_from_args(args)
|
||||
|
||||
# load pifpaf model
|
||||
model_pifpaf, _ = nets.factory_from_args(args)
|
||||
model_pifpaf = model_pifpaf.to(args.device)
|
||||
processor = decoder.factory_from_args(args, model_pifpaf)
|
||||
|
||||
# load monoloco
|
||||
# load pifpaf and monoloco models
|
||||
pifpaf = PifPaf(args)
|
||||
monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||
|
||||
# data
|
||||
@ -93,19 +24,15 @@ def predict(args):
|
||||
data, batch_size=1, shuffle=False,
|
||||
pin_memory=args.pin_memory, num_workers=args.loader_workers)
|
||||
|
||||
keypoints_whole = []
|
||||
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
|
||||
images = image_tensors.permute(0, 2, 3, 1)
|
||||
|
||||
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
|
||||
fields_batch = processor.fields(processed_images)
|
||||
fields_batch = pifpaf.fields(processed_images)
|
||||
|
||||
# unbatch
|
||||
for image_path, image, processed_image_cpu, fields in zip(
|
||||
image_paths,
|
||||
images,
|
||||
processed_images_cpu,
|
||||
fields_batch):
|
||||
image_paths, images, processed_images_cpu, fields_batch):
|
||||
|
||||
if args.output_directory is None:
|
||||
output_path = image_path
|
||||
@ -114,22 +41,7 @@ def predict(args):
|
||||
output_path = os.path.join(args.output_directory, file_name)
|
||||
print('image', idx, image_path, output_path)
|
||||
|
||||
processor.set_cpu_image(image, processed_image_cpu)
|
||||
keypoint_sets, scores = processor.keypoint_sets(fields)
|
||||
|
||||
# Correct to not change the confidence
|
||||
scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
|
||||
|
||||
if keypoint_sets.size > 0:
|
||||
keypoints_whole.append(np.around((keypoint_sets / scale_np), 1)
|
||||
.reshape(keypoint_sets.shape[0], -1).tolist())
|
||||
|
||||
pifpaf_out = [
|
||||
{'keypoints': np.around(kps / scale_np, 1).reshape(-1).tolist(),
|
||||
'bbox': [np.min(kps[:, 0]) / args.scale, np.min(kps[:, 1]) / args.scale,
|
||||
np.max(kps[:, 0]) / args.scale, np.max(kps[:, 1]) / args.scale]}
|
||||
for kps in keypoint_sets
|
||||
]
|
||||
keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
|
||||
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
|
||||
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
|
||||
|
||||
@ -138,7 +50,6 @@ def predict(args):
|
||||
float(image.size()[0] / args.scale)) # Width, Height (original)
|
||||
|
||||
# Extract calibration matrix and ground truth file if present
|
||||
|
||||
with open(image_path, 'rb') as f:
|
||||
pil_image = Image.open(f).convert('RGB')
|
||||
images_outputs.append(pil_image)
|
||||
@ -159,4 +70,3 @@ def predict(args):
|
||||
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
|
||||
print('Image {}\n'.format(cnt) + '-' * 120)
|
||||
cnt += 1
|
||||
return keypoints_whole
|
||||
|
||||
@ -24,7 +24,7 @@ def preprocess_pif(annotations, im_size=None):
|
||||
# Add 10% for y
|
||||
delta_h = (box[3] - box[1]) / 10
|
||||
delta_w = (box[2] - box[0]) / 10
|
||||
assert delta_h > 0 and delta_w > 0, "Bounding box <=0"
|
||||
assert delta_h > -5 and delta_w > -5, "Bounding box <=0"
|
||||
box[0] -= delta_w
|
||||
box[1] -= delta_h
|
||||
box[2] += delta_w
|
||||
|
||||
@ -73,7 +73,8 @@ class Printer:
|
||||
"combined figure cannot be print together with front or bird ones"
|
||||
|
||||
self.y_scale = self.width / (self.height * 1.8) # Defined proportion
|
||||
self.im = self.im.resize((self.width, round(self.height * self.y_scale)))
|
||||
if self.y_scale < 0.95 or self.y_scale > 1.05: # allows more variation without resizing
|
||||
self.im = self.im.resize((self.width, round(self.height * self.y_scale)))
|
||||
self.width = self.im.size[0]
|
||||
self.height = self.im.size[1]
|
||||
fig_width = self.fig_width + 0.6 * self.fig_width
|
||||
@ -178,9 +179,10 @@ class Printer:
|
||||
ellipse_real = Ellipse((self.xx_gt[idx], self.zz_gt[idx]), width=target * 2, height=1,
|
||||
angle=angle, color='lightgreen', fill=True, label="Task error")
|
||||
axes[1].add_patch(ellipse_real)
|
||||
axes[1].plot(self.xx_gt[idx], self.zz_gt[idx], 'kx', label="Ground truth", markersize=3)
|
||||
if abs(self.zz_gt[idx] - self.zz_pred[idx]) > 0.001:
|
||||
axes[1].plot(self.xx_gt[idx], self.zz_gt[idx], 'kx', label="Ground truth", markersize=3)
|
||||
|
||||
# Print prediction and the real ground truth. Color of prediction depends if ground truth exists
|
||||
# Print prediction and the real ground truth.
|
||||
num = 0
|
||||
for idx, _ in enumerate(self.xx_pred):
|
||||
if self.zz_gt[idx] > 0: # only the merging ones and inside the interval
|
||||
|
||||
122
src/visuals/webcam.py
Normal file
122
src/visuals/webcam.py
Normal file
@ -0,0 +1,122 @@
|
||||
"""
|
||||
Webcam demo application
|
||||
|
||||
Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/openpifpaf/webcam.py
|
||||
|
||||
"""
|
||||
|
||||
import time
|
||||
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
from PIL import Image
|
||||
from openpifpaf import transforms
|
||||
|
||||
import cv2
|
||||
|
||||
from visuals.printer import Printer
|
||||
from utils.pifpaf import preprocess_pif
|
||||
from predict.pifpaf import PifPaf
|
||||
from predict.monoloco import MonoLoco
|
||||
from predict.factory import factory_for_gt
|
||||
|
||||
|
||||
def webcam(args):
|
||||
|
||||
# add args.device
|
||||
args.device = torch.device('cpu')
|
||||
if torch.cuda.is_available():
|
||||
args.device = torch.device('cuda')
|
||||
|
||||
# load models
|
||||
args.camera = True
|
||||
pifpaf = PifPaf(args)
|
||||
monoloco = MonoLoco(model_path=args.model, device=args.device)
|
||||
|
||||
# Start recording
|
||||
cam = cv2.VideoCapture(0)
|
||||
visualizer_monoloco = None
|
||||
|
||||
while True:
|
||||
ret, frame = cam.read()
|
||||
image = cv2.resize(frame, None, fx=args.scale, fy=args.scale)
|
||||
height, width, _ = image.shape
|
||||
print('resized image size: {}'.format(image.shape))
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
processed_image_cpu = transforms.image_transform(image.copy())
|
||||
processed_image = processed_image_cpu.contiguous().to(args.device, non_blocking=True)
|
||||
fields = pifpaf.fields(torch.unsqueeze(processed_image, 0))[0]
|
||||
_, _, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
|
||||
|
||||
if not ret:
|
||||
break
|
||||
key = cv2.waitKey(1)
|
||||
|
||||
if key % 256 == 27:
|
||||
# ESC pressed
|
||||
print("Escape hit, closing...")
|
||||
break
|
||||
pil_image = Image.fromarray(image)
|
||||
intrinsic_size = [xx * 1.3 for xx in pil_image.size]
|
||||
kk, dict_gt = factory_for_gt(intrinsic_size) # better intrinsics for mac camera
|
||||
if visualizer_monoloco is None:
|
||||
visualizer_monoloco = VisualizerMonoloco(kk, args)(pil_image)
|
||||
visualizer_monoloco.send(None)
|
||||
|
||||
if pifpaf_out:
|
||||
boxes, keypoints = preprocess_pif(pifpaf_out, (width, height))
|
||||
outputs, varss = monoloco.forward(keypoints, kk)
|
||||
dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dict_gt)
|
||||
visualizer_monoloco.send((pil_image, dic_out))
|
||||
|
||||
cam.release()
|
||||
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
class VisualizerMonoloco:
|
||||
def __init__(self, kk, args, epistemic=False):
|
||||
self.kk = kk
|
||||
self.args = args
|
||||
self.z_max = args.z_max
|
||||
self.epistemic = epistemic
|
||||
self.output_types = args.output_types
|
||||
|
||||
def __call__(self, first_image, fig_width=4.0, **kwargs):
|
||||
if 'figsize' not in kwargs:
|
||||
kwargs['figsize'] = (fig_width, fig_width * first_image.size[0] / first_image.size[1])
|
||||
|
||||
printer = Printer(first_image, output_path="", kk=self.kk, output_types=self.output_types,
|
||||
z_max=self.z_max, epistemic=self.epistemic)
|
||||
figures, axes = printer.factory_axes()
|
||||
|
||||
for fig in figures:
|
||||
fig.show()
|
||||
|
||||
while True:
|
||||
image, dict_ann = yield
|
||||
draw_start = time.time()
|
||||
while axes and ((axes[0] and axes[0].patches) or (axes[-1] and axes[-1].patches)):
|
||||
if axes[0]:
|
||||
del axes[0].patches[0]
|
||||
del axes[0].texts[0]
|
||||
if len(axes) == 2:
|
||||
del axes[1].patches[0]
|
||||
del axes[1].patches[0] # the one became the 0
|
||||
if len(axes[1].lines) > 2:
|
||||
del axes[1].lines[2]
|
||||
del axes[1].texts[0]
|
||||
printer.draw(figures, axes, dict_ann, image)
|
||||
print('draw', time.time() - draw_start)
|
||||
mypause(0.01)
|
||||
|
||||
|
||||
def mypause(interval):
|
||||
manager = plt._pylab_helpers.Gcf.get_active()
|
||||
if manager is not None:
|
||||
canvas = manager.canvas
|
||||
if canvas.figure.stale:
|
||||
canvas.draw_idle()
|
||||
canvas.start_event_loop(interval)
|
||||
else:
|
||||
time.sleep(interval)
|
||||
Loading…
Reference in New Issue
Block a user