From ea63dd5781c31479956437b205b84f7c6f1986c9 Mon Sep 17 00:00:00 2001 From: charlesbvll Date: Tue, 23 Mar 2021 13:12:03 +0100 Subject: [PATCH] Merged old monstereo --- monoloco/activity.py | 110 ++++++------------ monoloco/network/net.py | 8 +- monoloco/predict.py | 6 +- monoloco/run.py | 29 +++-- monoloco/visuals/pifpaf_show.py | 104 +++++++++++++++-- monoloco/visuals/printer.py | 72 ++++++++++-- monoloco/visuals/webcam.py | 196 ++++++++++++++++++++++++++++++++ 7 files changed, 421 insertions(+), 104 deletions(-) create mode 100644 monoloco/visuals/webcam.py diff --git a/monoloco/activity.py b/monoloco/activity.py index 59d4f09..5b6e81f 100644 --- a/monoloco/activity.py +++ b/monoloco/activity.py @@ -8,10 +8,9 @@ from contextlib import contextmanager import numpy as np import torch import matplotlib.pyplot as plt -from matplotlib.patches import Circle, FancyArrow from .network.process import laplace_sampling -from .visuals.pifpaf_show import KeypointPainter, image_canvas +from .visuals.pifpaf_show import KeypointPainter, image_canvas, get_pifpaf_outputs, draw_orientation, social_distance_colors def social_interactions(idx, centers, angles, dds, stds=None, social_distance=False, @@ -65,6 +64,31 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa return False +def is_raising_hand(keypoint): + """ + Returns flag of alert if someone raises their hand + """ + l_shoulder = 5 + l_hand = 9 + r_shoulder = 6 + r_hand = 10 + h_offset = 10 + + if ((keypoint[1][l_hand] < keypoint[1][l_shoulder] and + keypoint[1][r_hand] < keypoint[1][r_shoulder]) and + (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder] and + keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder])): + return 'both' + + if (keypoint[1][l_hand] < keypoint[1][l_shoulder]) and (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder]): + return 'left' + + if keypoint[1][r_hand] < keypoint[1][r_shoulder] and keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder]: + return 'right' + + return 'none' + + def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False): """ Check F-formations for people close together (this function do not expect far away people): @@ -109,32 +133,37 @@ def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False return False -def show_social(args, image_t, output_path, annotations, dic_out): +def show_activities(args, image_t, output_path, annotations, dic_out): """Output frontal image with poses or combined with bird eye view""" assert 'front' in args.output_types or 'bird' in args.output_types, "outputs allowed: front and/or bird" + colors = ['deepskyblue' for _ in dic_out['uv_heads']] + if 'social_distance' in args.activities: + colors = social_distance_colors(colors, dic_out) + angles = dic_out['angles'] stds = dic_out['stds_ale'] xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']] - # Prepare color for social distancing - colors = ['r' if flag else 'deepskyblue' for flag in dic_out['social_distance']] - # Draw keypoints and orientation if 'front' in args.output_types: - keypoint_sets, scores = get_pifpaf_outputs(annotations) + keypoint_sets, _ = get_pifpaf_outputs(annotations) uv_centers = dic_out['uv_heads'] sizes = [abs(dic_out['uv_heads'][idx][1] - uv_s[1]) / 1.5 for idx, uv_s in enumerate(dic_out['uv_shoulders'])] keypoint_painter = KeypointPainter(show_box=False) + r_h = 'none' + if 'raise_hand' in args.activities: + r_h = dic_out['raising_hand'] + with image_canvas(image_t, output_path + '.front.png', show=args.show, fig_width=10, dpi_factor=1.0) as ax: - keypoint_painter.keypoints(ax, keypoint_sets, colors=colors) + keypoint_painter.keypoints(ax, keypoint_sets, colors=colors, raise_hand=r_h) draw_orientation(ax, uv_centers, sizes, angles, colors, mode='front') if 'bird' in args.output_types: @@ -144,21 +173,6 @@ def show_social(args, image_t, output_path, annotations, dic_out): draw_uncertainty(ax1, xz_centers, stds) -def get_pifpaf_outputs(annotations): - # TODO extract direct from predictions with pifpaf 0.11+ - """Extract keypoints sets and scores from output dictionary""" - if not annotations: - return [], [] - keypoints_sets = np.array([dic['keypoints'] for dic in annotations]).reshape((-1, 17, 3)) - score_weights = np.ones((keypoints_sets.shape[0], 17)) - score_weights[:, 3] = 3.0 - score_weights /= np.sum(score_weights[0, :]) - kps_scores = keypoints_sets[:, :, 2] - ordered_kps_scores = np.sort(kps_scores, axis=1)[:, ::-1] - scores = np.sum(score_weights * ordered_kps_scores, axis=1) - return keypoints_sets, scores - - @contextmanager def bird_canvas(output_path, z_max): fig, ax = plt.subplots(1, 1) @@ -174,56 +188,6 @@ def bird_canvas(output_path, z_max): print('Bird-eye-view image saved') -def draw_orientation(ax, centers, sizes, angles, colors, mode): - - if mode == 'front': - length = 5 - fill = False - alpha = 0.6 - zorder_circle = 0.5 - zorder_arrow = 5 - linewidth = 1.5 - edgecolor = 'k' - radiuses = [s / 1.2 for s in sizes] - else: - length = 1.3 - head_width = 0.3 - linewidth = 2 - radiuses = [0.2] * len(centers) - # length = 1.6 - # head_width = 0.4 - # linewidth = 2.7 - radiuses = [0.2] * len(centers) - fill = True - alpha = 1 - zorder_circle = 2 - zorder_arrow = 1 - - for idx, theta in enumerate(angles): - color = colors[idx] - radius = radiuses[idx] - - if mode == 'front': - x_arr = centers[idx][0] + (length + radius) * math.cos(theta) - z_arr = length + centers[idx][1] + (length + radius) * math.sin(theta) - delta_x = math.cos(theta) - delta_z = math.sin(theta) - head_width = max(10, radiuses[idx] / 1.5) - - else: - edgecolor = color - x_arr = centers[idx][0] - z_arr = centers[idx][1] - delta_x = length * math.cos(theta) - delta_z = - length * math.sin(theta) # keep into account kitti convention - - circle = Circle(centers[idx], radius=radius, color=color, fill=fill, alpha=alpha, zorder=zorder_circle) - arrow = FancyArrow(x_arr, z_arr, delta_x, delta_z, head_width=head_width, edgecolor=edgecolor, - facecolor=color, linewidth=linewidth, zorder=zorder_arrow) - ax.add_patch(circle) - ax.add_patch(arrow) - - def draw_uncertainty(ax, centers, stds): for idx, std in enumerate(stds): std = stds[idx] diff --git a/monoloco/network/net.py b/monoloco/network/net.py index ffb4c58..90b9920 100644 --- a/monoloco/network/net.py +++ b/monoloco/network/net.py @@ -14,7 +14,7 @@ import torch from ..utils import get_iou_matches, reorder_matches, get_keypoints, pixel_to_camera, xyz_from_distance from .process import preprocess_monstereo, preprocess_monoloco, extract_outputs, extract_outputs_mono,\ filter_outputs, cluster_outputs, unnormalize_bi -from ..activity import social_interactions +from ..activity import social_interactions, is_raising_hand from .architectures import MonolocoModel, MonStereoModel @@ -265,6 +265,12 @@ class Loco: return dic_out + @staticmethod + def raising_hand(dic_out, keypoints): + dic_out['raising_hand'] = [is_raising_hand(keypoint) for keypoint in keypoints] + return dic_out + + def median_disparity(dic_out, keypoints, keypoints_r, mask): """ Ablation study: whenever a matching is found, compute depth by median disparity instead of using MonSter diff --git a/monoloco/predict.py b/monoloco/predict.py index f2ac121..6f940e7 100644 --- a/monoloco/predict.py +++ b/monoloco/predict.py @@ -21,7 +21,7 @@ from openpifpaf import decoder, network, visualizer, show, logger from .visuals.printer import Printer from .network import Loco from .network.process import factory_for_gt, preprocess_pifpaf -from .activity import show_social +from .activity import show_activities, show_social LOG = logging.getLogger(__name__) @@ -239,8 +239,8 @@ def factory_outputs(args, pifpaf_outs, dic_out, output_path, kk=None): elif any((xx in args.output_types for xx in ['front', 'bird', 'multi'])): LOG.info(output_path) - if args.social_distance: - show_social(args, pifpaf_outs['image'], output_path, pifpaf_outs['left'], dic_out) + if args.activities: + show_activities(args, pifpaf_outs['image'], output_path, pifpaf_outs['left'], dic_out) else: printer = Printer(pifpaf_outs['image'], output_path, kk, args) figures, axes = printer.factory_axes(dic_out) diff --git a/monoloco/run.py b/monoloco/run.py index 226daae..6c8210c 100644 --- a/monoloco/run.py +++ b/monoloco/run.py @@ -23,7 +23,7 @@ def cli(): help='what to output: json keypoints skeleton for Pifpaf' 'json bird front or multi for MonStereo') predict_parser.add_argument('--no_save', help='to show images', action='store_true') - predict_parser.add_argument('--dpi', help='image resolution', type=int, default=150) + predict_parser.add_argument('--dpi', help='image resolution', type=int, default=150) predict_parser.add_argument('--long-edge', default=None, type=int, help='rescale the long side of the image (aspect ratio maintained)') predict_parser.add_argument('--white-overlay', @@ -45,15 +45,20 @@ def cli(): show.cli(parser) visualizer.cli(parser) - predict_parser.add_argument('--mode', help='keypoints, mono, stereo', default='mono') - predict_parser.add_argument('--model', help='path of MonoLoco/MonStereo model to load') - predict_parser.add_argument('--net', help='only to select older MonoLoco model, otherwise use --mode') - predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization') + # Monoloco + predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand') + predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo', default='monoloco_pp') + predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True) + predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512) + predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization', + default='data/arrays/names-kitti-200615-1022.json') + predict_parser.add_argument('--transform', help='transformation for the pose', default='None') predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100) predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0) predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2) predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true') - + predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true') + predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image') # Social distancing and social interactions predict_parser.add_argument('--social_distance', help='social', action='store_true') predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25) @@ -122,8 +127,16 @@ def cli(): def main(): args = cli() if args.command == 'predict': - from .predict import predict - predict(args) + if args.webcam: + if 'json'in args.output_types: + args.output_types = 'multi' + if args.z_max == 100: + args.z_max = 10 + from .visuals.webcam import webcam + webcam(args) + else: + from .predict import predict + predict(args) elif args.command == 'prep': if 'nuscenes' in args.dataset: diff --git a/monoloco/visuals/pifpaf_show.py b/monoloco/visuals/pifpaf_show.py index fc7811e..b0b309f 100644 --- a/monoloco/visuals/pifpaf_show.py +++ b/monoloco/visuals/pifpaf_show.py @@ -2,6 +2,7 @@ # File adapted from https://github.com/vita-epfl/openpifpaf from contextlib import contextmanager +import math import numpy as np from PIL import Image @@ -9,6 +10,7 @@ from PIL import Image try: import matplotlib import matplotlib.pyplot as plt + from matplotlib.patches import Circle, FancyArrow import scipy.ndimage as ndimage except ImportError: matplotlib = None @@ -72,12 +74,13 @@ def load_image(path, scale=1.0): class KeypointPainter(object): def __init__(self, *, skeleton=None, - xy_scale=1.0, highlight=None, highlight_invisible=False, + xy_scale=1.0, y_scale=1.0, highlight=None, highlight_invisible=False, show_box=True, linewidth=2, markersize=3, color_connections=False, solid_threshold=0.5): self.skeleton = skeleton or COCO_PERSON_SKELETON self.xy_scale = xy_scale + self.y_scale = y_scale self.highlight = highlight self.highlight_invisible = highlight_invisible self.show_box = show_box @@ -87,22 +90,29 @@ class KeypointPainter(object): self.solid_threshold = solid_threshold self.dashed_threshold = 0.1 # Patch to still allow force complete pose (set to zero to resume original) - def _draw_skeleton(self, ax, x, y, v, *, color=None): + def _draw_skeleton(self, ax, x, y, v, *, color=None, raise_hand='none'): if not np.any(v > 0): return if self.skeleton is not None: for ci, connection in enumerate(np.array(self.skeleton) - 1): c = color + linewidth=self.linewidth + if ((connection[0] == 5 and connection[1] == 7) or (connection[0] == 7 and connection[1] == 9)) and raise_hand in ['left','both']: + c = 'yellow' + linewidth = np.sqrt((x[9]-x[7])**2 + (y[9]-y[7])**2) + if ((connection[0] == 6 and connection[1] == 8) or (connection[0] == 8 and connection[1] == 10)) and raise_hand in ['right', 'both']: + c = 'yellow' + linewidth = np.sqrt((x[9]-x[7])**2 + (y[9]-y[7])**2) if self.color_connections: c = matplotlib.cm.get_cmap('tab20')(ci / len(self.skeleton)) if np.all(v[connection] > self.dashed_threshold): ax.plot(x[connection], y[connection], - linewidth=self.linewidth, color=c, + linewidth=linewidth, color=c, linestyle='dashed', dash_capstyle='round') if np.all(v[connection] > self.solid_threshold): ax.plot(x[connection], y[connection], - linewidth=self.linewidth, color=c, solid_capstyle='round') + linewidth=linewidth, color=c, solid_capstyle='round') # highlight invisible keypoints inv_color = 'k' if self.highlight_invisible else color @@ -169,7 +179,7 @@ class KeypointPainter(object): matplotlib.patches.Rectangle( (x - scale, y - scale), 2 * scale, 2 * scale, fill=False, color=color)) - def keypoints(self, ax, keypoint_sets, *, scores=None, color=None, colors=None, texts=None): + def keypoints(self, ax, keypoint_sets, *, scores=None, color=None, colors=None, texts=None, raise_hand='none'): if keypoint_sets is None: return @@ -181,7 +191,7 @@ class KeypointPainter(object): for i, kps in enumerate(np.asarray(keypoint_sets)): assert kps.shape[1] == 3 x = kps[:, 0] * self.xy_scale - y = kps[:, 1] * self.xy_scale + y = kps[:, 1] * self.xy_scale * self.y_scale v = kps[:, 2] if colors is not None: @@ -190,7 +200,11 @@ class KeypointPainter(object): if isinstance(color, (int, np.integer)): color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20) - self._draw_skeleton(ax, x, y, v, color=color) + self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i]) + score = scores[i] if scores is not None else None + z_str = str(score).split(sep='.') + text = z_str[0] + '.' + z_str[1][0] + self._draw_text(ax, x-2, y, v, text, color) if self.show_box: score = scores[i] if scores is not None else None self._draw_box(ax, x, y, v, color, score) @@ -334,3 +348,79 @@ def white_screen(ax, alpha=0.9): plt.Rectangle((0, 0), 1, 1, transform=ax.transAxes, alpha=alpha, facecolor='white') ) + + +def get_pifpaf_outputs(annotations): + # TODO extract direct from predictions with pifpaf 0.11+ + """Extract keypoints sets and scores from output dictionary""" + if not annotations: + return [], [] + keypoints_sets = np.array([dic['keypoints'] + for dic in annotations]).reshape((-1, 17, 3)) + score_weights = np.ones((keypoints_sets.shape[0], 17)) + score_weights[:, 3] = 3.0 + score_weights /= np.sum(score_weights[0, :]) + kps_scores = keypoints_sets[:, :, 2] + ordered_kps_scores = np.sort(kps_scores, axis=1)[:, ::-1] + scores = np.sum(score_weights * ordered_kps_scores, axis=1) + return keypoints_sets, scores + + +def draw_orientation(ax, centers, sizes, angles, colors, mode): + + if mode == 'front': + length = 5 + fill = False + alpha = 0.6 + zorder_circle = 0.5 + zorder_arrow = 5 + linewidth = 1.5 + edgecolor = 'k' + radiuses = [s / 1.2 for s in sizes] + else: + length = 1.3 + head_width = 0.3 + linewidth = 2 + radiuses = [0.2] * len(centers) + # length = 1.6 + # head_width = 0.4 + # linewidth = 2.7 + radiuses = [0.2] * len(centers) + fill = True + alpha = 1 + zorder_circle = 2 + zorder_arrow = 1 + + for idx, theta in enumerate(angles): + color = colors[idx] + radius = radiuses[idx] + + if mode == 'front': + x_arr = centers[idx][0] + (length + radius) * math.cos(theta) + z_arr = length + centers[idx][1] + \ + (length + radius) * math.sin(theta) + delta_x = math.cos(theta) + delta_z = math.sin(theta) + head_width = max(10, radiuses[idx] / 1.5) + + else: + edgecolor = color + x_arr = centers[idx][0] + z_arr = centers[idx][1] + delta_x = length * math.cos(theta) + # keep into account kitti convention + delta_z = - length * math.sin(theta) + + circle = Circle(centers[idx], radius=radius, color=color, + fill=fill, alpha=alpha, zorder=zorder_circle) + arrow = FancyArrow(x_arr, z_arr, delta_x, delta_z, head_width=head_width, edgecolor=edgecolor, + facecolor=color, linewidth=linewidth, zorder=zorder_arrow) + ax.add_patch(circle) + ax.add_patch(arrow) + + +def social_distance_colors(colors, dic_out): + # Prepare color for social distancing + colors = ['r' if flag else colors[idx] for idx,flag in enumerate(dic_out['social_distance'])] + return colors + diff --git a/monoloco/visuals/printer.py b/monoloco/visuals/printer.py index b8c21be..7e52843 100644 --- a/monoloco/visuals/printer.py +++ b/monoloco/visuals/printer.py @@ -8,6 +8,7 @@ from collections import OrderedDict import matplotlib.pyplot as plt from matplotlib.patches import Rectangle +from .pifpaf_show import KeypointPainter, get_pifpaf_outputs, draw_orientation, social_distance_colors from ..utils import pixel_to_camera @@ -59,21 +60,25 @@ class Printer: self.kk = kk self.output_types = args.output_types self.z_max = args.z_max # set max distance to show instances - self.show = args.show - self.show_all = args.show_all - self.save = not args.no_save + self.show_all = args.show_all or args.webcam + self.show = args.show_all or args.webcam + self.save = not args.no_save and not args.webcam + self.plt_close = not args.webcam + self.args = args # define image attributes self.attr = image_attributes(args.dpi, args.output_types) def _process_results(self, dic_ann): # Include the vectors inside the interval given by z_max + self.angles = dic_ann['angles'] self.stds_ale = dic_ann['stds_ale'] self.stds_epi = dic_ann['stds_epi'] self.gt = dic_ann['gt'] # regulate ground-truth matching self.xx_gt = [xx[0] for xx in dic_ann['xyz_real']] self.xx_pred = [xx[0] for xx in dic_ann['xyz_pred']] + self.xz_centers = [[xx[0], xx[2]] for xx in dic_ann['xyz_pred']] # Set maximum distance self.dd_pred = dic_ann['dds_pred'] self.dd_real = dic_ann['dds_real'] @@ -86,6 +91,10 @@ class Printer: for idx, xx in enumerate(dic_ann['xyz_pred'])] self.uv_heads = dic_ann['uv_heads'] + self.centers = self.uv_heads + if 'multi' in self.output_types: + for center in self.centers: + center[1] = center[1] * self.y_scale self.uv_shoulders = dic_ann['uv_shoulders'] self.boxes = dic_ann['boxes'] self.boxes_gt = dic_ann['boxes_gt'] @@ -107,7 +116,8 @@ class Printer: figures = [] # Process the annotation dictionary of monoloco - self._process_results(dic_out) + if dic_out: + self._process_results(dic_out) # Initialize multi figure, resizing it for aesthetic proportion if 'multi' in self.output_types: @@ -165,7 +175,31 @@ class Printer: axes.append(ax1) return figures, axes - def draw(self, figures, axes, image): + + def social_distance_front(self, axis, colors, annotations, dic_out): + sizes = [abs(self.centers[idx][1] - uv_s[1]*self.y_scale) / 1.5 for idx, uv_s in + enumerate(self.uv_shoulders)] + + keypoint_sets, _ = get_pifpaf_outputs(annotations) + keypoint_painter = KeypointPainter(show_box=False, y_scale=self.y_scale) + r_h = 'none' + if 'raise_hand' in self.args.activities: + r_h = dic_out['raising_hand'] + keypoint_painter.keypoints( + axis, keypoint_sets, scores=self.dd_pred,colors=colors, raise_hand=r_h) + draw_orientation(axis, self.centers, + sizes, self.angles, colors, mode='front') + + + def social_distance_bird(self, axis, colors): + draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird') + + def draw(self, figures, axes, image, dic_out, annotations): + + if self.args.activities: + colors = ['deepskyblue' for _ in self.uv_heads] + if 'social_distance' in self.args.activities: + colors = social_distance_colors(colors, dic_out) # whether to include instances that don't match the ground-truth iterator = range(len(self.zz_pred)) if self.show_all else range(len(self.zz_gt)) @@ -176,13 +210,20 @@ class Printer: number = dict(flag=False, num=97) if any(xx in self.output_types for xx in ['front', 'multi']): number['flag'] = True # add numbers - self.mpl_im0.set_data(image) + if not self.args.activities or 'social_distance' not in self.args.activities: + self.mpl_im0.set_data(image) for idx in iterator: if any(xx in self.output_types for xx in ['front', 'multi']) and self.zz_pred[idx] > 0: - self._draw_front(axes[0], - self.dd_pred[idx], - idx, - number) + if self.args.activities: + if 'social_distance' in self.args.activities: + self.social_distance_front(axes[0], colors, annotations, dic_out) + elif 'raise_hand' in self.args.activities: + self.social_distance_front(axes[0], colors, annotations, dic_out) + else: + self._draw_front(axes[0], + self.dd_pred[idx], + idx, + number) number['num'] += 1 # Draw the bird figure @@ -190,6 +231,9 @@ class Printer: for idx in iterator: if any(xx in self.output_types for xx in ['bird', 'multi']) and self.zz_pred[idx] > 0: + if self.args.activities: + if 'social_distance' in self.args.activities: + self.social_distance_bird(axes[1], colors) # Draw ground truth and uncertainty self._draw_uncertainty(axes, idx) @@ -206,7 +250,10 @@ class Printer: fig.savefig(self.output_path + self.extensions[idx], bbox_inches='tight', dpi=self.attr['dpi']) if self.show: fig.show() - plt.close(fig) + if self.plt_close: + plt.close(fig) + + def _draw_front(self, ax, z, idx, number): @@ -360,7 +407,8 @@ class Printer: ax.set_axis_off() ax.set_xlim(0, self.width) ax.set_ylim(self.height, 0) - self.mpl_im0 = ax.imshow(self.im) + if not self.args.activities or 'social_distance' not in self.args.activities: + self.mpl_im0 = ax.imshow(self.im) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) diff --git a/monoloco/visuals/webcam.py b/monoloco/visuals/webcam.py new file mode 100644 index 0000000..bfa1cc3 --- /dev/null +++ b/monoloco/visuals/webcam.py @@ -0,0 +1,196 @@ +# pylint: disable=W0212 +""" +Webcam demo application + +Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/openpifpaf/webcam.py + +""" + +import time +import os + +import torch +import matplotlib.pyplot as plt +from PIL import Image +import cv2 + +from openpifpaf import decoder, network, visualizer, show +import openpifpaf.datasets as datasets +from openpifpaf.predict import processor_factory, preprocess_factory + +from ..visuals import Printer +from ..network import Loco +from ..network.process import preprocess_pifpaf, factory_for_gt + +OPENPIFPAF_PATH = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl' + + +def factory_from_args(args): + + # Model + if not args.checkpoint: + if os.path.exists(OPENPIFPAF_PATH): + args.checkpoint = OPENPIFPAF_PATH + else: + args.checkpoint = 'shufflenetv2k30' + + # Devices + args.device = torch.device('cpu') + args.pin_memory = False + if torch.cuda.is_available(): + args.device = torch.device('cuda') + args.pin_memory = True + + # Add visualization defaults + args.figure_width = 10 + args.dpi_factor = 1.0 + + if args.net == 'monstereo': + args.batch_size = 2 + else: + args.batch_size = 1 + + # Make default pifpaf argument + args.force_complete_pose = True + + # Configure + decoder.configure(args) + network.Factory.configure(args) + show.configure(args) + visualizer.configure(args) + + return args + + +def webcam(args): + + args = factory_from_args(args) + # Load Models + net = Loco(model=args.model, net=args.net, device=args.device, + n_dropout=args.n_dropout, p_dropout=args.dropout) + + processor, model = processor_factory(args) + preprocess = preprocess_factory(args) + + # Start recording + cam = cv2.VideoCapture(0) + visualizer_monstereo = None + + while True: + start = time.time() + ret, frame = cam.read() + image = cv2.resize(frame, None, fx=args.scale, fy=args.scale) + height, width, _ = image.shape + print('resized image size: {}'.format(image.shape)) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(image) + + data = datasets.PilImageList( + make_list(pil_image), preprocess=preprocess) + + data_loader = torch.utils.data.DataLoader( + data, batch_size=1, shuffle=False, + pin_memory=False, collate_fn=datasets.collate_images_anns_meta) + + for (image_tensors_batch, _, meta_batch) in data_loader: + pred_batch = processor.batch( + model, image_tensors_batch, device=args.device) + + for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)): + pred = [ann.inverse_transform(meta) for ann in pred] + + if idx == 0: + pifpaf_outs = { + 'pred': pred, + 'left': [ann.json_data() for ann in pred], + 'image': image} + else: + pifpaf_outs['right'] = [ann.json_data() for ann in pred] + + if not ret: + break + key = cv2.waitKey(1) + if key % 256 == 27: + # ESC pressed + print("Escape hit, closing...") + break + intrinsic_size = [xx * 1.3 for xx in pil_image.size] + kk, dic_gt = factory_for_gt(intrinsic_size, + focal_length=args.focal, + path_gt=args.path_gt) # better intrinsics for mac camera + boxes, keypoints = preprocess_pifpaf( + pifpaf_outs['left'], (width, height)) + + dic_out = net.forward(keypoints, kk) + dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt) + + if args.activities: + if 'social_distance' in args.activities: + dic_out = net.social_distance(dic_out, args) + if 'raise_hand' in args.activities: + dic_out = net.raising_hand(dic_out, keypoints) + if visualizer_monstereo is None: # it is, at the beginning + visualizer_monstereo = VisualizerMonstereo(kk, + args)(pil_image) # create it with the first image + visualizer_monstereo.send(None) + + print(dic_out) + visualizer_monstereo.send((pil_image, dic_out, pifpaf_outs)) + + end = time.time() + print("run-time: {:.2f} ms".format((end-start)*1000)) + + cam.release() + + cv2.destroyAllWindows() + + +class VisualizerMonstereo: + def __init__(self, kk, args): + self.kk = kk + self.args = args + + def __call__(self, first_image, fig_width=1.0, **kwargs): + if 'figsize' not in kwargs: + kwargs['figsize'] = (fig_width, fig_width * + first_image.size[0] / first_image.size[1]) + + printer = Printer(first_image, output_path="", + kk=self.kk, args=self.args) + + figures, axes = printer.factory_axes(None) + + for fig in figures: + fig.show() + + while True: + image, dic_out, pifpaf_outs = yield + + # Clears previous annotations between frames + axes[0].patches = [] + axes[0].lines = [] + axes[0].texts = [] + if len(axes) > 1: + axes[1].patches = [] + axes[1].lines = [axes[1].lines[0], axes[1].lines[1]] + axes[1].texts = [] + + if dic_out: + printer._process_results(dic_out) + printer.draw(figures, axes, image, dic_out, pifpaf_outs['left']) + mypause(0.01) + + +def mypause(interval): + manager = plt._pylab_helpers.Gcf.get_active() + if manager is not None: + canvas = manager.canvas + if canvas.figure.stale: + canvas.draw_idle() + canvas.start_event_loop(interval) + else: + time.sleep(interval) + + +def make_list(*args): + return list(args)