Webcam (#5)
* add webcam skeleton * fix bug * adapt intrinsic matrix and assertion on negative boxes * delete patches * add support for bird view * remove ground truth legend and remove unnecessary resizing * add class pifpaf * add keypoints scaling if not webcam * add comments * remove space * add gif * fix gif name * update readme * update readme
This commit is contained in:
parent
f0150da571
commit
eae0ad5f7e
13
README.md
13
README.md
@ -68,6 +68,19 @@ To check all the commands for the parser and the subparsers run:
|
|||||||
* `python3 src/main.py eval --help`
|
* `python3 src/main.py eval --help`
|
||||||
|
|
||||||
|
|
||||||
|
# Webcam
|
||||||
|
<img src="docs/webcam_short.gif" height=250 alt="example image" />
|
||||||
|
|
||||||
|
MonoLoco can run on personal computers with no GPU and low resolution images (e.g. 256x144).
|
||||||
|
|
||||||
|
It support 3 types of visualizations: `front`, `bird` and `combined`
|
||||||
|
Multiple visualizations can be combined in different windows.
|
||||||
|
|
||||||
|
The above gif has been obtained running on a Macbook the command:
|
||||||
|
|
||||||
|
`python src/main.py predict --webcam --scale 0.2 --output_types combined --z_max 10`
|
||||||
|
|
||||||
|
|
||||||
# Predict
|
# Predict
|
||||||
The predict script receives an image (or an entire folder using glob expressions),
|
The predict script receives an image (or an entire folder using glob expressions),
|
||||||
calls PifPaf for 2d human pose detection over the image
|
calls PifPaf for 2d human pose detection over the image
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 694 KiB |
BIN
docs/webcam_short.gif
Normal file
BIN
docs/webcam_short.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.7 MiB |
10
src/main.py
10
src/main.py
@ -15,6 +15,7 @@ from eval.generate_kitti import generate_kitti
|
|||||||
from eval.geom_baseline import geometric_baseline
|
from eval.geom_baseline import geometric_baseline
|
||||||
from models.hyp_tuning import HypTuning
|
from models.hyp_tuning import HypTuning
|
||||||
from eval.kitti_eval import KittiEval
|
from eval.kitti_eval import KittiEval
|
||||||
|
from visuals.webcam import webcam
|
||||||
|
|
||||||
|
|
||||||
def cli():
|
def cli():
|
||||||
@ -57,7 +58,7 @@ def cli():
|
|||||||
|
|
||||||
# 2) Monoloco argument
|
# 2) Monoloco argument
|
||||||
predict_parser.add_argument('--model', help='path of MonoLoco model to load',
|
predict_parser.add_argument('--model', help='path of MonoLoco model to load',
|
||||||
default="data/models/best_model__seed_2_.pickle")
|
default="data/models/monoloco-190513-1437.pkl")
|
||||||
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=256)
|
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=256)
|
||||||
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
|
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
|
||||||
default='data/arrays/names-kitti-190513-1754.json')
|
default='data/arrays/names-kitti-190513-1754.json')
|
||||||
@ -67,7 +68,7 @@ def cli():
|
|||||||
predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=22)
|
predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=22)
|
||||||
predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
|
predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
|
||||||
predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
|
predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
|
||||||
predict_parser.add_argument('--combined', help='to print combined images', action='store_true')
|
predict_parser.add_argument('--webcam', help='monoloco streaming', action='store_true')
|
||||||
|
|
||||||
# Training
|
# Training
|
||||||
training_parser.add_argument('--joints', help='Json file with input joints',
|
training_parser.add_argument('--joints', help='Json file with input joints',
|
||||||
@ -107,7 +108,10 @@ def main():
|
|||||||
args = cli()
|
args = cli()
|
||||||
|
|
||||||
if args.command == 'predict':
|
if args.command == 'predict':
|
||||||
_ = predict(args)
|
if args.webcam:
|
||||||
|
webcam(args)
|
||||||
|
else:
|
||||||
|
predict(args)
|
||||||
|
|
||||||
elif args.command == 'prep':
|
elif args.command == 'prep':
|
||||||
if 'nuscenes' in args.dataset:
|
if 'nuscenes' in args.dataset:
|
||||||
|
|||||||
110
src/predict/pifpaf.py
Normal file
110
src/predict/pifpaf.py
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
|
||||||
|
import glob
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torchvision
|
||||||
|
import torch
|
||||||
|
from PIL import Image, ImageFile
|
||||||
|
|
||||||
|
from openpifpaf.network import nets
|
||||||
|
from openpifpaf import decoder
|
||||||
|
from openpifpaf import transforms
|
||||||
|
|
||||||
|
|
||||||
|
class ImageList(torch.utils.data.Dataset):
|
||||||
|
"""It defines transformations to apply to images and outputs of the dataloader"""
|
||||||
|
def __init__(self, image_paths, scale, image_transform=None):
|
||||||
|
self.image_paths = image_paths
|
||||||
|
self.image_transform = image_transform or transforms.image_transform # to_tensor + normalize (from pifpaf)
|
||||||
|
self.scale = scale
|
||||||
|
|
||||||
|
# data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2
|
||||||
|
# .0)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
image_path = self.image_paths[index]
|
||||||
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||||
|
with open(image_path, 'rb') as f:
|
||||||
|
image = Image.open(f).convert('RGB')
|
||||||
|
|
||||||
|
if self.scale > 1.01 or self.scale < 0.99:
|
||||||
|
image = torchvision.transforms.functional.resize(image,
|
||||||
|
(round(self.scale * image.size[1]),
|
||||||
|
round(self.scale * image.size[0])),
|
||||||
|
interpolation=Image.BICUBIC)
|
||||||
|
# PIL images are not iterables
|
||||||
|
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
|
||||||
|
image = self.image_transform(image)
|
||||||
|
|
||||||
|
return image_path, original_image, image
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.image_paths)
|
||||||
|
|
||||||
|
|
||||||
|
def factory_from_args(args):
|
||||||
|
|
||||||
|
# Merge the model_pifpaf argument
|
||||||
|
if not args.checkpoint:
|
||||||
|
args.checkpoint = args.model_pifpaf
|
||||||
|
# glob
|
||||||
|
if not args.webcam:
|
||||||
|
if args.glob:
|
||||||
|
args.images += glob.glob(args.glob)
|
||||||
|
if not args.images:
|
||||||
|
raise Exception("no image files given")
|
||||||
|
|
||||||
|
# add args.device
|
||||||
|
args.device = torch.device('cpu')
|
||||||
|
args.pin_memory = False
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
args.device = torch.device('cuda')
|
||||||
|
args.pin_memory = True
|
||||||
|
|
||||||
|
# Add num_workers
|
||||||
|
args.loader_workers = 8
|
||||||
|
|
||||||
|
# Add visualization defaults
|
||||||
|
args.figure_width = 10
|
||||||
|
args.dpi_factor = 1.0
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
class PifPaf:
|
||||||
|
def __init__(self, args):
|
||||||
|
"""Instanciate the mdodel"""
|
||||||
|
factory_from_args(args)
|
||||||
|
model_pifpaf, _ = nets.factory_from_args(args)
|
||||||
|
model_pifpaf = model_pifpaf.to(args.device)
|
||||||
|
self.processor = decoder.factory_from_args(args, model_pifpaf)
|
||||||
|
self.keypoints_whole = []
|
||||||
|
|
||||||
|
# Scale the keypoints to the original image size for printing (if not webcam)
|
||||||
|
if not args.webcam:
|
||||||
|
self.scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
|
||||||
|
else:
|
||||||
|
self.scale_np = np.array([1, 1, 1] * 17).reshape(17, 3)
|
||||||
|
|
||||||
|
def fields(self, processed_images):
|
||||||
|
"""Encoder for pif and paf fields"""
|
||||||
|
fields_batch = self.processor.fields(processed_images)
|
||||||
|
return fields_batch
|
||||||
|
|
||||||
|
def forward(self, image, processed_image_cpu, fields):
|
||||||
|
"""Decoder, from pif and paf fields to keypoints"""
|
||||||
|
self.processor.set_cpu_image(image, processed_image_cpu)
|
||||||
|
keypoint_sets, scores = self.processor.keypoint_sets(fields)
|
||||||
|
|
||||||
|
if keypoint_sets.size > 0:
|
||||||
|
self.keypoints_whole.append(np.around((keypoint_sets / self.scale_np), 1)
|
||||||
|
.reshape(keypoint_sets.shape[0], -1).tolist())
|
||||||
|
|
||||||
|
pifpaf_out = [
|
||||||
|
{'keypoints': np.around(kps / self.scale_np, 1).reshape(-1).tolist(),
|
||||||
|
'bbox': [np.min(kps[:, 0]) / self.scale_np[0, 0], np.min(kps[:, 1]) / self.scale_np[0, 0],
|
||||||
|
np.max(kps[:, 0]) / self.scale_np[0, 0], np.max(kps[:, 1]) / self.scale_np[0, 0]]}
|
||||||
|
for kps in keypoint_sets
|
||||||
|
]
|
||||||
|
return keypoint_sets, scores, pifpaf_out
|
||||||
|
|
||||||
@ -1,90 +1,21 @@
|
|||||||
|
|
||||||
import glob
|
|
||||||
import os
|
import os
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torchvision
|
|
||||||
import torch
|
import torch
|
||||||
from PIL import Image, ImageFile
|
|
||||||
|
|
||||||
from openpifpaf.network import nets
|
from predict.pifpaf import PifPaf, ImageList
|
||||||
from openpifpaf import decoder
|
|
||||||
from openpifpaf import transforms
|
|
||||||
from predict.monoloco import MonoLoco
|
from predict.monoloco import MonoLoco
|
||||||
from predict.factory import factory_for_gt, factory_outputs
|
from predict.factory import factory_for_gt, factory_outputs
|
||||||
from utils.pifpaf import preprocess_pif
|
from utils.pifpaf import preprocess_pif
|
||||||
|
|
||||||
|
|
||||||
class ImageList(torch.utils.data.Dataset):
|
|
||||||
"""It defines transformations to apply to images and outputs of the dataloader"""
|
|
||||||
def __init__(self, image_paths, scale, image_transform=None):
|
|
||||||
self.image_paths = image_paths
|
|
||||||
self.image_transform = image_transform or transforms.image_transform # to_tensor + normalize (from pifpaf)
|
|
||||||
self.scale = scale
|
|
||||||
|
|
||||||
# data = datasets.ImageList(args.images, preprocess=transforms.RescaleRelative(2
|
|
||||||
# .0)
|
|
||||||
|
|
||||||
def __getitem__(self, index):
|
|
||||||
image_path = self.image_paths[index]
|
|
||||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
|
||||||
with open(image_path, 'rb') as f:
|
|
||||||
image = Image.open(f).convert('RGB')
|
|
||||||
|
|
||||||
if self.scale > 1.01 or self.scale < 0.99:
|
|
||||||
image = torchvision.transforms.functional.resize(image,
|
|
||||||
(round(self.scale * image.size[1]),
|
|
||||||
round(self.scale * image.size[0])),
|
|
||||||
interpolation=Image.BICUBIC)
|
|
||||||
# PIL images are not iterables
|
|
||||||
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
|
|
||||||
image = self.image_transform(image)
|
|
||||||
|
|
||||||
return image_path, original_image, image
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.image_paths)
|
|
||||||
|
|
||||||
|
|
||||||
def factory_from_args(args):
|
|
||||||
|
|
||||||
# Merge the model_pifpaf argument
|
|
||||||
if not args.checkpoint:
|
|
||||||
args.checkpoint = args.model_pifpaf
|
|
||||||
# glob
|
|
||||||
if args.glob:
|
|
||||||
args.images += glob.glob(args.glob)
|
|
||||||
if not args.images:
|
|
||||||
raise Exception("no image files given")
|
|
||||||
|
|
||||||
# add args.device
|
|
||||||
args.device = torch.device('cpu')
|
|
||||||
args.pin_memory = False
|
|
||||||
if torch.cuda.is_available():
|
|
||||||
args.device = torch.device('cuda')
|
|
||||||
args.pin_memory = True
|
|
||||||
|
|
||||||
# Add num_workers
|
|
||||||
args.loader_workers = 8
|
|
||||||
|
|
||||||
# Add visualization defaults
|
|
||||||
args.figure_width = 10
|
|
||||||
args.dpi_factor = 1.0
|
|
||||||
|
|
||||||
return args
|
|
||||||
|
|
||||||
|
|
||||||
def predict(args):
|
def predict(args):
|
||||||
|
|
||||||
cnt = 0
|
cnt = 0
|
||||||
factory_from_args(args)
|
|
||||||
|
|
||||||
# load pifpaf model
|
# load pifpaf and monoloco models
|
||||||
model_pifpaf, _ = nets.factory_from_args(args)
|
pifpaf = PifPaf(args)
|
||||||
model_pifpaf = model_pifpaf.to(args.device)
|
|
||||||
processor = decoder.factory_from_args(args, model_pifpaf)
|
|
||||||
|
|
||||||
# load monoloco
|
|
||||||
monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
monoloco = MonoLoco(model_path=args.model, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||||
|
|
||||||
# data
|
# data
|
||||||
@ -93,19 +24,15 @@ def predict(args):
|
|||||||
data, batch_size=1, shuffle=False,
|
data, batch_size=1, shuffle=False,
|
||||||
pin_memory=args.pin_memory, num_workers=args.loader_workers)
|
pin_memory=args.pin_memory, num_workers=args.loader_workers)
|
||||||
|
|
||||||
keypoints_whole = []
|
|
||||||
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
|
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
|
||||||
images = image_tensors.permute(0, 2, 3, 1)
|
images = image_tensors.permute(0, 2, 3, 1)
|
||||||
|
|
||||||
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
|
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
|
||||||
fields_batch = processor.fields(processed_images)
|
fields_batch = pifpaf.fields(processed_images)
|
||||||
|
|
||||||
# unbatch
|
# unbatch
|
||||||
for image_path, image, processed_image_cpu, fields in zip(
|
for image_path, image, processed_image_cpu, fields in zip(
|
||||||
image_paths,
|
image_paths, images, processed_images_cpu, fields_batch):
|
||||||
images,
|
|
||||||
processed_images_cpu,
|
|
||||||
fields_batch):
|
|
||||||
|
|
||||||
if args.output_directory is None:
|
if args.output_directory is None:
|
||||||
output_path = image_path
|
output_path = image_path
|
||||||
@ -114,22 +41,7 @@ def predict(args):
|
|||||||
output_path = os.path.join(args.output_directory, file_name)
|
output_path = os.path.join(args.output_directory, file_name)
|
||||||
print('image', idx, image_path, output_path)
|
print('image', idx, image_path, output_path)
|
||||||
|
|
||||||
processor.set_cpu_image(image, processed_image_cpu)
|
keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
|
||||||
keypoint_sets, scores = processor.keypoint_sets(fields)
|
|
||||||
|
|
||||||
# Correct to not change the confidence
|
|
||||||
scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
|
|
||||||
|
|
||||||
if keypoint_sets.size > 0:
|
|
||||||
keypoints_whole.append(np.around((keypoint_sets / scale_np), 1)
|
|
||||||
.reshape(keypoint_sets.shape[0], -1).tolist())
|
|
||||||
|
|
||||||
pifpaf_out = [
|
|
||||||
{'keypoints': np.around(kps / scale_np, 1).reshape(-1).tolist(),
|
|
||||||
'bbox': [np.min(kps[:, 0]) / args.scale, np.min(kps[:, 1]) / args.scale,
|
|
||||||
np.max(kps[:, 0]) / args.scale, np.max(kps[:, 1]) / args.scale]}
|
|
||||||
for kps in keypoint_sets
|
|
||||||
]
|
|
||||||
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
|
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
|
||||||
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
|
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor (resized) and monoloco original image
|
||||||
|
|
||||||
@ -138,7 +50,6 @@ def predict(args):
|
|||||||
float(image.size()[0] / args.scale)) # Width, Height (original)
|
float(image.size()[0] / args.scale)) # Width, Height (original)
|
||||||
|
|
||||||
# Extract calibration matrix and ground truth file if present
|
# Extract calibration matrix and ground truth file if present
|
||||||
|
|
||||||
with open(image_path, 'rb') as f:
|
with open(image_path, 'rb') as f:
|
||||||
pil_image = Image.open(f).convert('RGB')
|
pil_image = Image.open(f).convert('RGB')
|
||||||
images_outputs.append(pil_image)
|
images_outputs.append(pil_image)
|
||||||
@ -159,4 +70,3 @@ def predict(args):
|
|||||||
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
|
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
|
||||||
print('Image {}\n'.format(cnt) + '-' * 120)
|
print('Image {}\n'.format(cnt) + '-' * 120)
|
||||||
cnt += 1
|
cnt += 1
|
||||||
return keypoints_whole
|
|
||||||
|
|||||||
@ -24,7 +24,7 @@ def preprocess_pif(annotations, im_size=None):
|
|||||||
# Add 10% for y
|
# Add 10% for y
|
||||||
delta_h = (box[3] - box[1]) / 10
|
delta_h = (box[3] - box[1]) / 10
|
||||||
delta_w = (box[2] - box[0]) / 10
|
delta_w = (box[2] - box[0]) / 10
|
||||||
assert delta_h > 0 and delta_w > 0, "Bounding box <=0"
|
assert delta_h > -5 and delta_w > -5, "Bounding box <=0"
|
||||||
box[0] -= delta_w
|
box[0] -= delta_w
|
||||||
box[1] -= delta_h
|
box[1] -= delta_h
|
||||||
box[2] += delta_w
|
box[2] += delta_w
|
||||||
|
|||||||
@ -73,6 +73,7 @@ class Printer:
|
|||||||
"combined figure cannot be print together with front or bird ones"
|
"combined figure cannot be print together with front or bird ones"
|
||||||
|
|
||||||
self.y_scale = self.width / (self.height * 1.8) # Defined proportion
|
self.y_scale = self.width / (self.height * 1.8) # Defined proportion
|
||||||
|
if self.y_scale < 0.95 or self.y_scale > 1.05: # allows more variation without resizing
|
||||||
self.im = self.im.resize((self.width, round(self.height * self.y_scale)))
|
self.im = self.im.resize((self.width, round(self.height * self.y_scale)))
|
||||||
self.width = self.im.size[0]
|
self.width = self.im.size[0]
|
||||||
self.height = self.im.size[1]
|
self.height = self.im.size[1]
|
||||||
@ -178,9 +179,10 @@ class Printer:
|
|||||||
ellipse_real = Ellipse((self.xx_gt[idx], self.zz_gt[idx]), width=target * 2, height=1,
|
ellipse_real = Ellipse((self.xx_gt[idx], self.zz_gt[idx]), width=target * 2, height=1,
|
||||||
angle=angle, color='lightgreen', fill=True, label="Task error")
|
angle=angle, color='lightgreen', fill=True, label="Task error")
|
||||||
axes[1].add_patch(ellipse_real)
|
axes[1].add_patch(ellipse_real)
|
||||||
|
if abs(self.zz_gt[idx] - self.zz_pred[idx]) > 0.001:
|
||||||
axes[1].plot(self.xx_gt[idx], self.zz_gt[idx], 'kx', label="Ground truth", markersize=3)
|
axes[1].plot(self.xx_gt[idx], self.zz_gt[idx], 'kx', label="Ground truth", markersize=3)
|
||||||
|
|
||||||
# Print prediction and the real ground truth. Color of prediction depends if ground truth exists
|
# Print prediction and the real ground truth.
|
||||||
num = 0
|
num = 0
|
||||||
for idx, _ in enumerate(self.xx_pred):
|
for idx, _ in enumerate(self.xx_pred):
|
||||||
if self.zz_gt[idx] > 0: # only the merging ones and inside the interval
|
if self.zz_gt[idx] > 0: # only the merging ones and inside the interval
|
||||||
|
|||||||
122
src/visuals/webcam.py
Normal file
122
src/visuals/webcam.py
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
"""
|
||||||
|
Webcam demo application
|
||||||
|
|
||||||
|
Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/openpifpaf/webcam.py
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from PIL import Image
|
||||||
|
from openpifpaf import transforms
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
from visuals.printer import Printer
|
||||||
|
from utils.pifpaf import preprocess_pif
|
||||||
|
from predict.pifpaf import PifPaf
|
||||||
|
from predict.monoloco import MonoLoco
|
||||||
|
from predict.factory import factory_for_gt
|
||||||
|
|
||||||
|
|
||||||
|
def webcam(args):
|
||||||
|
|
||||||
|
# add args.device
|
||||||
|
args.device = torch.device('cpu')
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
args.device = torch.device('cuda')
|
||||||
|
|
||||||
|
# load models
|
||||||
|
args.camera = True
|
||||||
|
pifpaf = PifPaf(args)
|
||||||
|
monoloco = MonoLoco(model_path=args.model, device=args.device)
|
||||||
|
|
||||||
|
# Start recording
|
||||||
|
cam = cv2.VideoCapture(0)
|
||||||
|
visualizer_monoloco = None
|
||||||
|
|
||||||
|
while True:
|
||||||
|
ret, frame = cam.read()
|
||||||
|
image = cv2.resize(frame, None, fx=args.scale, fy=args.scale)
|
||||||
|
height, width, _ = image.shape
|
||||||
|
print('resized image size: {}'.format(image.shape))
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
|
processed_image_cpu = transforms.image_transform(image.copy())
|
||||||
|
processed_image = processed_image_cpu.contiguous().to(args.device, non_blocking=True)
|
||||||
|
fields = pifpaf.fields(torch.unsqueeze(processed_image, 0))[0]
|
||||||
|
_, _, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
|
||||||
|
|
||||||
|
if not ret:
|
||||||
|
break
|
||||||
|
key = cv2.waitKey(1)
|
||||||
|
|
||||||
|
if key % 256 == 27:
|
||||||
|
# ESC pressed
|
||||||
|
print("Escape hit, closing...")
|
||||||
|
break
|
||||||
|
pil_image = Image.fromarray(image)
|
||||||
|
intrinsic_size = [xx * 1.3 for xx in pil_image.size]
|
||||||
|
kk, dict_gt = factory_for_gt(intrinsic_size) # better intrinsics for mac camera
|
||||||
|
if visualizer_monoloco is None:
|
||||||
|
visualizer_monoloco = VisualizerMonoloco(kk, args)(pil_image)
|
||||||
|
visualizer_monoloco.send(None)
|
||||||
|
|
||||||
|
if pifpaf_out:
|
||||||
|
boxes, keypoints = preprocess_pif(pifpaf_out, (width, height))
|
||||||
|
outputs, varss = monoloco.forward(keypoints, kk)
|
||||||
|
dic_out = monoloco.post_process(outputs, varss, boxes, keypoints, kk, dict_gt)
|
||||||
|
visualizer_monoloco.send((pil_image, dic_out))
|
||||||
|
|
||||||
|
cam.release()
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
|
||||||
|
class VisualizerMonoloco:
|
||||||
|
def __init__(self, kk, args, epistemic=False):
|
||||||
|
self.kk = kk
|
||||||
|
self.args = args
|
||||||
|
self.z_max = args.z_max
|
||||||
|
self.epistemic = epistemic
|
||||||
|
self.output_types = args.output_types
|
||||||
|
|
||||||
|
def __call__(self, first_image, fig_width=4.0, **kwargs):
|
||||||
|
if 'figsize' not in kwargs:
|
||||||
|
kwargs['figsize'] = (fig_width, fig_width * first_image.size[0] / first_image.size[1])
|
||||||
|
|
||||||
|
printer = Printer(first_image, output_path="", kk=self.kk, output_types=self.output_types,
|
||||||
|
z_max=self.z_max, epistemic=self.epistemic)
|
||||||
|
figures, axes = printer.factory_axes()
|
||||||
|
|
||||||
|
for fig in figures:
|
||||||
|
fig.show()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
image, dict_ann = yield
|
||||||
|
draw_start = time.time()
|
||||||
|
while axes and ((axes[0] and axes[0].patches) or (axes[-1] and axes[-1].patches)):
|
||||||
|
if axes[0]:
|
||||||
|
del axes[0].patches[0]
|
||||||
|
del axes[0].texts[0]
|
||||||
|
if len(axes) == 2:
|
||||||
|
del axes[1].patches[0]
|
||||||
|
del axes[1].patches[0] # the one became the 0
|
||||||
|
if len(axes[1].lines) > 2:
|
||||||
|
del axes[1].lines[2]
|
||||||
|
del axes[1].texts[0]
|
||||||
|
printer.draw(figures, axes, dict_ann, image)
|
||||||
|
print('draw', time.time() - draw_start)
|
||||||
|
mypause(0.01)
|
||||||
|
|
||||||
|
|
||||||
|
def mypause(interval):
|
||||||
|
manager = plt._pylab_helpers.Gcf.get_active()
|
||||||
|
if manager is not None:
|
||||||
|
canvas = manager.canvas
|
||||||
|
if canvas.figure.stale:
|
||||||
|
canvas.draw_idle()
|
||||||
|
canvas.start_event_loop(interval)
|
||||||
|
else:
|
||||||
|
time.sleep(interval)
|
||||||
Loading…
Reference in New Issue
Block a user