diff --git a/README.md b/README.md index c9106d3..af7ee48 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,28 @@ When processing KITTI images, the network uses the provided intrinsic matrix of In all the other cases, we use the parameters of nuScenes cameras, with "1/1.8'' CMOS sensors of size 7.2 x 5.4 mm. The default focal length is 5.7mm and this parameter can be modified using the argument `--focal`. +## Webcam + +You can use the webcam as input by using the `--webcam` argument. By default the `--z_max` is set to 10 while using the webcam and the `--long-edge` is set to 144. If multiple webcams are plugged in you can choose between them using `--camera`, for instance to use the second camera you can add `--camera 1`. +we can see a few examples below, obtained we the following commands : + +For the first and last visualization: +``` +python -m monoloco.run predict \ +--webcam \ +--activities raise_hand +``` +For the second one : +``` +python -m monoloco.run predict \ +--webcam \ +--activities raise_hand social_distance +``` + +![webcam](docs/webcam.gif) + +With `social_distance` in `--activities`, only the keypoints will be shown, with no image, allowing total anonimity. + ## A) 3D Localization **Ground-truth comparison**
@@ -165,7 +187,7 @@ python3 -m monoloco.run predict --glob docs/005523*.png \ --output_types multi \ ![Occluded hard example](docs/out_005523.png.multi.jpg) ## B) Social Distancing (and Talking activity) -To visualize social distancing compliance, simply add the argument `--social-distance` to the predict command. This visualization is not supported with a stereo camera. +To visualize social distancing compliance, simply add the argument `social_distance` to `--activities`. This visualization is not supported with a stereo camera. Threshold distance and radii (for F-formations) can be set using `--threshold-dist` and `--radii`, respectively. For more info, run: @@ -180,13 +202,31 @@ To visualize social distancing run the below, command: ```sh python -m monoloco.run predict docs/frame0032.jpg \ ---social_distance --output_types front bird +--activities social_distance --output_types front bird ``` +## C) Hand-raising detection +To detect raised hand, you can add `raise_hand` to `--activities`. -## C) Orientation and Bounding Box dimensions +For more info, run: +`python -m monoloco.run predict --help` + +**Examples**
+ +The command below: +``` +python -m monoloco.run predict .\docs\raising_hand.jpg \ +--output_types front \ +--activities raise_hand +``` +Yields the following: + +![raise_hand_taxi](docs/out_raising_hand.jpg.front.png) + + +## D) Orientation and Bounding Box dimensions The network estimates orientation and box dimensions as well. Results are saved in a json file when using the command `--output_types json`. At the moment, the only visualization including orientation is the social distancing one.
@@ -411,4 +451,4 @@ When using this library in your research, we will be happy if you cite us! month = {October}, year = {2019} } -``` \ No newline at end of file +``` diff --git a/docs/out_raising_hand.jpg.front.png b/docs/out_raising_hand.jpg.front.png new file mode 100644 index 0000000..3b1590e Binary files /dev/null and b/docs/out_raising_hand.jpg.front.png differ diff --git a/docs/raising_hand.jpg b/docs/raising_hand.jpg new file mode 100644 index 0000000..512aa78 Binary files /dev/null and b/docs/raising_hand.jpg differ diff --git a/docs/webcam.gif b/docs/webcam.gif new file mode 100644 index 0000000..594b2dc Binary files /dev/null and b/docs/webcam.gif differ diff --git a/monoloco/activity.py b/monoloco/activity.py index df43d75..5a3149e 100644 --- a/monoloco/activity.py +++ b/monoloco/activity.py @@ -8,10 +8,11 @@ from contextlib import contextmanager import numpy as np import torch import matplotlib.pyplot as plt -from matplotlib.patches import Circle, FancyArrow from .network.process import laplace_sampling -from .visuals.pifpaf_show import KeypointPainter, image_canvas +from .visuals.pifpaf_show import ( + KeypointPainter, image_canvas, get_pifpaf_outputs, draw_orientation, social_distance_colors +) def social_interactions(idx, centers, angles, dds, stds=None, social_distance=False, @@ -23,9 +24,11 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa # A) Check whether people are close together xx = centers[idx][0] zz = centers[idx][1] - distances = [math.sqrt((xx - centers[i][0]) ** 2 + (zz - centers[i][1]) ** 2) for i, _ in enumerate(centers)] + distances = [math.sqrt((xx - centers[i][0]) ** 2 + (zz - centers[i][1]) ** 2) + for i, _ in enumerate(centers)] sorted_idxs = np.argsort(distances) - indices = [idx_t for idx_t in sorted_idxs[1:] if distances[idx_t] <= threshold_dist] + indices = [idx_t for idx_t in sorted_idxs[1:] + if distances[idx_t] <= threshold_dist] # B) Check whether people are looking inwards and whether there are no intrusions # Deterministic @@ -65,6 +68,56 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa return False +def is_raising_hand(kp): + """ + Returns flag of alert if someone raises their hand + """ + x=0 + y=1 + + nose = 0 + l_ear = 3 + l_shoulder = 5 + l_elbow = 7 + l_hand = 9 + r_ear = 4 + r_shoulder = 6 + r_elbow = 8 + r_hand = 10 + + head_width = kp[x][l_ear]- kp[x][r_ear] + head_top = (kp[y][nose] - head_width) + + l_forearm = [kp[x][l_hand] - kp[x][l_elbow], kp[y][l_hand] - kp[y][l_elbow]] + l_arm = [kp[x][l_shoulder] - kp[x][l_elbow], kp[y][l_shoulder] - kp[y][l_elbow]] + + r_forearm = [kp[x][r_hand] - kp[x][r_elbow], kp[y][r_hand] - kp[y][r_elbow]] + r_arm = [kp[x][r_shoulder] - kp[x][r_elbow], kp[y][r_shoulder] - kp[y][r_elbow]] + + l_angle = (90/np.pi) * np.arccos(np.dot(l_forearm/np.linalg.norm(l_forearm), l_arm/np.linalg.norm(l_arm))) + r_angle = (90/np.pi) * np.arccos(np.dot(r_forearm/np.linalg.norm(r_forearm), r_arm/np.linalg.norm(r_arm))) + + is_l_up = kp[y][l_hand] < kp[y][l_shoulder] + is_r_up = kp[y][r_hand] < kp[y][r_shoulder] + + l_too_close = kp[x][l_hand] <= kp[x][l_shoulder] and kp[y][l_hand]>=head_top + r_too_close = kp[x][r_hand] >= kp[x][r_shoulder] and kp[y][r_hand]>=head_top + + is_left_risen = is_l_up and l_angle >= 30 and not l_too_close + is_right_risen = is_r_up and r_angle >= 30 and not r_too_close + + if is_left_risen and is_right_risen: + return 'both' + + if is_left_risen: + return 'left' + + if is_right_risen: + return 'right' + + return None + + def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False): """ Check F-formations for people close together (this function do not expect far away people): @@ -73,7 +126,8 @@ def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False """ # Extract centers and angles - other_centers = np.array([cent for l, cent in enumerate(centers) if l not in (idx, idx_t)]) + other_centers = np.array( + [cent for l, cent in enumerate(centers) if l not in (idx, idx_t)]) theta0 = angles[idx] theta1 = angles[idx_t] @@ -92,15 +146,18 @@ def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False # 1) Verify they are looking inwards. # The distance between mus and the center should be less wrt the original position and the center - d_new = np.linalg.norm(mu_0 - mu_1) / 2 if social_distance else np.linalg.norm(mu_0 - mu_1) + d_new = np.linalg.norm( + mu_0 - mu_1) / 2 if social_distance else np.linalg.norm(mu_0 - mu_1) d_0 = np.linalg.norm(x_0 - o_c) d_1 = np.linalg.norm(x_1 - o_c) # 2) Verify no intrusion for third parties if other_centers.size: - other_distances = np.linalg.norm(other_centers - o_c.reshape(1, -1), axis=1) + other_distances = np.linalg.norm( + other_centers - o_c.reshape(1, -1), axis=1) else: - other_distances = 100 * np.ones((1, 1)) # Condition verified if no other people + # Condition verified if no other people + other_distances = 100 * np.ones((1, 1)) # Binary Classification # if np.min(other_distances) > radius: # Ablation without orientation @@ -109,18 +166,19 @@ def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False return False -def show_social(args, image_t, output_path, annotations, dic_out): +def show_activities(args, image_t, output_path, annotations, dic_out): """Output frontal image with poses or combined with bird eye view""" assert 'front' in args.output_types or 'bird' in args.output_types, "outputs allowed: front and/or bird" + colors = ['deepskyblue' for _ in dic_out['uv_heads']] + if 'social_distance' in args.activities: + colors = social_distance_colors(colors, dic_out) + angles = dic_out['angles'] stds = dic_out['stds_ale'] xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']] - # Prepare color for social distancing - colors = ['r' if flag else 'deepskyblue' for flag in dic_out['social_distance']] - # Draw keypoints and orientation if 'front' in args.output_types: keypoint_sets, _ = get_pifpaf_outputs(annotations) @@ -134,8 +192,11 @@ def show_social(args, image_t, output_path, annotations, dic_out): show=args.show, fig_width=10, dpi_factor=1.0) as ax: - keypoint_painter.keypoints(ax, keypoint_sets, colors=colors) - draw_orientation(ax, uv_centers, sizes, angles, colors, mode='front') + keypoint_painter.keypoints( + ax, keypoint_sets, activities=args.activities, dic_out=dic_out, + size=image_t.size, colors=colors) + draw_orientation(ax, uv_centers, sizes, + angles, colors, mode='front') if 'bird' in args.output_types: z_max = min(args.z_max, 4 + max([el[1] for el in xz_centers])) @@ -144,21 +205,6 @@ def show_social(args, image_t, output_path, annotations, dic_out): draw_uncertainty(ax1, xz_centers, stds) -def get_pifpaf_outputs(annotations): - # TODO extract direct from predictions with pifpaf 0.11+ - """Extract keypoints sets and scores from output dictionary""" - if not annotations: - return [], [] - keypoints_sets = np.array([dic['keypoints'] for dic in annotations]).reshape((-1, 17, 3)) - score_weights = np.ones((keypoints_sets.shape[0], 17)) - score_weights[:, 3] = 3.0 - score_weights /= np.sum(score_weights[0, :]) - kps_scores = keypoints_sets[:, :, 2] - ordered_kps_scores = np.sort(kps_scores, axis=1)[:, ::-1] - scores = np.sum(score_weights * ordered_kps_scores, axis=1) - return keypoints_sets, scores - - @contextmanager def bird_canvas(output_path, z_max): fig, ax = plt.subplots(1, 1) @@ -174,56 +220,6 @@ def bird_canvas(output_path, z_max): print('Bird-eye-view image saved') -def draw_orientation(ax, centers, sizes, angles, colors, mode): - - if mode == 'front': - length = 5 - fill = False - alpha = 0.6 - zorder_circle = 0.5 - zorder_arrow = 5 - linewidth = 1.5 - edgecolor = 'k' - radiuses = [s / 1.2 for s in sizes] - else: - length = 1.3 - head_width = 0.3 - linewidth = 2 - radiuses = [0.2] * len(centers) - # length = 1.6 - # head_width = 0.4 - # linewidth = 2.7 - radiuses = [0.2] * len(centers) - fill = True - alpha = 1 - zorder_circle = 2 - zorder_arrow = 1 - - for idx, theta in enumerate(angles): - color = colors[idx] - radius = radiuses[idx] - - if mode == 'front': - x_arr = centers[idx][0] + (length + radius) * math.cos(theta) - z_arr = length + centers[idx][1] + (length + radius) * math.sin(theta) - delta_x = math.cos(theta) - delta_z = math.sin(theta) - head_width = max(10, radiuses[idx] / 1.5) - - else: - edgecolor = color - x_arr = centers[idx][0] - z_arr = centers[idx][1] - delta_x = length * math.cos(theta) - delta_z = - length * math.sin(theta) # keep into account kitti convention - - circle = Circle(centers[idx], radius=radius, color=color, fill=fill, alpha=alpha, zorder=zorder_circle) - arrow = FancyArrow(x_arr, z_arr, delta_x, delta_z, head_width=head_width, edgecolor=edgecolor, - facecolor=color, linewidth=linewidth, zorder=zorder_arrow) - ax.add_patch(circle) - ax.add_patch(arrow) - - def draw_uncertainty(ax, centers, stds): for idx, std in enumerate(stds): std = stds[idx] diff --git a/monoloco/network/net.py b/monoloco/network/net.py index 697261d..b078b93 100644 --- a/monoloco/network/net.py +++ b/monoloco/network/net.py @@ -16,7 +16,7 @@ from ..utils import get_iou_matches, reorder_matches, get_keypoints, pixel_to_ca mask_joint_disparity from .process import preprocess_monstereo, preprocess_monoloco, extract_outputs, extract_outputs_mono,\ filter_outputs, cluster_outputs, unnormalize_bi, laplace_sampling -from ..activity import social_interactions +from ..activity import social_interactions, is_raising_hand from .architectures import MonolocoModel, LocoModel @@ -266,6 +266,12 @@ class Loco: return dic_out + @staticmethod + def raising_hand(dic_out, keypoints): + dic_out['raising_hand'] = [is_raising_hand(keypoint) for keypoint in keypoints] + return dic_out + + def median_disparity(dic_out, keypoints, keypoints_r, mask): """ Ablation study: whenever a matching is found, compute depth by median disparity instead of using MonSter diff --git a/monoloco/predict.py b/monoloco/predict.py index 66eabcf..d426431 100644 --- a/monoloco/predict.py +++ b/monoloco/predict.py @@ -28,7 +28,7 @@ except ImportError: from .visuals.printer import Printer from .network import Loco from .network.process import factory_for_gt, preprocess_pifpaf -from .activity import show_social +from .activity import show_activities LOG = logging.getLogger(__name__) @@ -75,7 +75,7 @@ def download_checkpoints(args): assert not args.social_distance, "Social distance not supported in stereo modality" path = MONSTEREO_MODEL name = 'monstereo-201202-1212.pkl' - elif args.social_distance: + elif ('social_distance' in args.activities) or args.webcam: path = MONOLOCO_MODEL_NU name = 'monoloco_pp-201207-1350.pkl' else: @@ -167,14 +167,16 @@ def predict(args): # data data = datasets.ImageList(args.images, preprocess=preprocess) if args.mode == 'stereo': - assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting" + assert len( + data.image_paths) % 2 == 0, "Odd number of images in a stereo setting" data_loader = torch.utils.data.DataLoader( data, batch_size=args.batch_size, shuffle=False, pin_memory=False, collate_fn=datasets.collate_images_anns_meta) for batch_i, (image_tensors_batch, _, meta_batch) in enumerate(data_loader): - pred_batch = processor.batch(pifpaf_model, image_tensors_batch, device=args.device) + pred_batch = processor.batch( + pifpaf_model, image_tensors_batch, device=args.device) # unbatch (only for MonStereo) for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)): @@ -196,7 +198,8 @@ def predict(args): output_path = os.path.join(splits[0], 'out_' + splits[1]) else: file_name = os.path.basename(meta['file_name']) - output_path = os.path.join(args.output_directory, 'out_' + file_name) + output_path = os.path.join( + args.output_directory, 'out_' + file_name) im_name = os.path.basename(meta['file_name']) print(f'{batch_i} image {im_name} saved as {output_path}') @@ -208,23 +211,29 @@ def predict(args): # 3D Predictions if args.mode != 'keypoints': im_size = (cpu_image.size[0], cpu_image.size[1]) # Original - kk, dic_gt = factory_for_gt(im_size, focal_length=args.focal, name=im_name, path_gt=args.path_gt) + kk, dic_gt = factory_for_gt( + im_size, focal_length=args.focal, name=im_name, path_gt=args.path_gt) # Preprocess pifpaf outputs and run monoloco - boxes, keypoints = preprocess_pifpaf(pifpaf_outs['left'], im_size, enlarge_boxes=False) + boxes, keypoints = preprocess_pifpaf( + pifpaf_outs['left'], im_size, enlarge_boxes=False) if args.mode == 'mono': LOG.info("Prediction with MonoLoco++") dic_out = net.forward(keypoints, kk) - dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt) - if args.social_distance: + dic_out = net.post_process( + dic_out, boxes, keypoints, kk, dic_gt) + if 'social_distance' in args.activities: dic_out = net.social_distance(dic_out, args) + if 'raise_hand' in args.activities: + dic_out = net.raising_hand(dic_out, keypoints) else: LOG.info("Prediction with MonStereo") _, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size) dic_out = net.forward(keypoints, kk, keypoints_r=keypoints_r) - dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt) + dic_out = net.post_process( + dic_out, boxes, keypoints, kk, dic_gt) else: dic_out = defaultdict(list) @@ -245,7 +254,7 @@ def factory_outputs(args, pifpaf_outs, dic_out, output_path, kk=None): else: assert 'json' in args.output_types or args.mode == 'keypoints', \ "No output saved, please select one among front, bird, multi, json, or pifpaf arguments" - if args.social_distance: + if 'social_distance' in args.activities: assert args.mode == 'mono', "Social distancing only works with monocular network" if args.mode == 'keypoints': @@ -256,8 +265,9 @@ def factory_outputs(args, pifpaf_outs, dic_out, output_path, kk=None): if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])): LOG.info(output_path) - if args.social_distance: - show_social(args, pifpaf_outs['image'], output_path, pifpaf_outs['left'], dic_out) + if args.activities: + show_activities( + args, pifpaf_outs['image'], output_path, pifpaf_outs['left'], dic_out) else: printer = Printer(pifpaf_outs['image'], output_path, kk, args) figures, axes = printer.factory_axes(dic_out) diff --git a/monoloco/run.py b/monoloco/run.py index 59c26c8..418d668 100644 --- a/monoloco/run.py +++ b/monoloco/run.py @@ -20,11 +20,13 @@ def cli(): predict_parser.add_argument('--glob', help='glob expression for input images (for many images)') predict_parser.add_argument('--checkpoint', help='pifpaf model') predict_parser.add_argument('-o', '--output-directory', help='Output directory') - predict_parser.add_argument('--output_types', nargs='+', default=['json'], + predict_parser.add_argument('--output_types', nargs='+', default=['multi'], help='what to output: json keypoints skeleton for Pifpaf' 'json bird front or multi for MonStereo') predict_parser.add_argument('--no_save', help='to show images', action='store_true') - predict_parser.add_argument('--dpi', help='image resolution', type=int, default=150) + predict_parser.add_argument('--hide_distance', help='to not show the absolute distance of people from the camera', + default=False, action='store_true') + predict_parser.add_argument('--dpi', help='image resolution', type=int, default=150) predict_parser.add_argument('--long-edge', default=None, type=int, help='rescale the long side of the image (aspect ratio maintained)') predict_parser.add_argument('--white-overlay', @@ -47,19 +49,24 @@ def cli(): show.cli(parser) visualizer.cli(parser) + # Monoloco + predict_parser.add_argument('--activities', nargs='+', choices=['raise_hand', 'social_distance'], + help='Choose activities to show: social_distance, raise_hand', default=[]) predict_parser.add_argument('--mode', help='keypoints, mono, stereo', default='mono') predict_parser.add_argument('--model', help='path of MonoLoco/MonStereo model to load') predict_parser.add_argument('--net', help='only to select older MonoLoco model, otherwise use --mode') predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization') + #default='data/arrays/names-kitti-200615-1022.json') predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100) predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0) predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2) predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true') + predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true') + predict_parser.add_argument('--camera', help='device to use for webcam streaming', type=int, default=0) predict_parser.add_argument('--focal', help='focal length in mm for a sensor size of 7.2x5.4 mm. (nuScenes)', type=float, default=5.7) # Social distancing and social interactions - predict_parser.add_argument('--social_distance', help='social', action='store_true') predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25) predict_parser.add_argument('--threshold_dist', type=float, help='min distance of people', default=2.5) predict_parser.add_argument('--radii', type=tuple, help='o-space radii', default=(0.3, 0.5, 1)) @@ -127,8 +134,12 @@ def cli(): def main(): args = cli() if args.command == 'predict': - from .predict import predict - predict(args) + if args.webcam: + from .visuals.webcam import webcam + webcam(args) + else: + from .predict import predict + predict(args) elif args.command == 'prep': if 'nuscenes' in args.dataset: diff --git a/monoloco/visuals/pifpaf_show.py b/monoloco/visuals/pifpaf_show.py index 43c97df..46bc4dc 100644 --- a/monoloco/visuals/pifpaf_show.py +++ b/monoloco/visuals/pifpaf_show.py @@ -6,16 +6,19 @@ and licensed under GNU AGPLv3 """ from contextlib import contextmanager +import math import numpy as np from PIL import Image -import matplotlib -import matplotlib.pyplot as plt try: + import matplotlib + import matplotlib.pyplot as plt + from matplotlib.patches import Circle, FancyArrow import scipy.ndimage as ndimage except ImportError: ndimage = None + plt = None COCO_PERSON_SKELETON = [ @@ -71,15 +74,41 @@ def load_image(path, scale=1.0): return image +def highlighted_arm(x, y, connection, color, lwidth, raise_hand, size=None): + + c = color + linewidth = lwidth + + width, height = (1,1) + if size: + width = size[0] + height = size[1] + + l_arm_width = np.sqrt(((x[9]-x[7])/width)**2 + ((y[9]-y[7])/height)**2)*100 + r_arm_width = np.sqrt(((x[10]-x[8])/width)**2 + ((y[10]-y[8])/height)**2)*100 + + if ((connection[0] == 5 and connection[1] == 7) + or (connection[0] == 7 and connection[1] == 9)) and raise_hand in ['left','both']: + c = 'yellow' + linewidth = l_arm_width + if ((connection[0] == 6 and connection[1] == 8) + or (connection[0] == 8 and connection[1] == 10)) and raise_hand in ['right', 'both']: + c = 'yellow' + linewidth = r_arm_width + + return c, linewidth + + class KeypointPainter: def __init__(self, *, skeleton=None, - xy_scale=1.0, highlight=None, highlight_invisible=False, + xy_scale=1.0, y_scale=1.0, highlight=None, highlight_invisible=False, show_box=True, linewidth=2, markersize=3, color_connections=False, solid_threshold=0.5): self.skeleton = skeleton or COCO_PERSON_SKELETON self.xy_scale = xy_scale + self.y_scale = y_scale self.highlight = highlight self.highlight_invisible = highlight_invisible self.show_box = show_box @@ -89,22 +118,29 @@ class KeypointPainter: self.solid_threshold = solid_threshold self.dashed_threshold = 0.1 # Patch to still allow force complete pose (set to zero to resume original) - def _draw_skeleton(self, ax, x, y, v, *, color=None): + + def _draw_skeleton(self, ax, x, y, v, *, i=0, size=None, color=None, activities=None, dic_out=None): if not np.any(v > 0): return if self.skeleton is not None: for ci, connection in enumerate(np.array(self.skeleton) - 1): c = color + linewidth = self.linewidth + + if 'raise_hand' in activities: + c, linewidth = highlighted_arm(x, y, connection, c, linewidth, + dic_out['raising_hand'][:][i], size=size) + if self.color_connections: c = matplotlib.cm.get_cmap('tab20')(ci / len(self.skeleton)) if np.all(v[connection] > self.dashed_threshold): ax.plot(x[connection], y[connection], - linewidth=self.linewidth, color=c, + linewidth=linewidth, color=c, linestyle='dashed', dash_capstyle='round') if np.all(v[connection] > self.solid_threshold): ax.plot(x[connection], y[connection], - linewidth=self.linewidth, color=c, solid_capstyle='round') + linewidth=linewidth, color=c, solid_capstyle='round') # highlight invisible keypoints inv_color = 'k' if self.highlight_invisible else color @@ -145,7 +181,7 @@ class KeypointPainter: ax.text(x1, y1, '{:.4f}'.format(score), fontsize=8, color=color) @staticmethod - def _draw_text(ax, x, y, v, text, color): + def _draw_text(ax, x, y, v, text, color, fontsize=8): if not np.any(v > 0): return @@ -159,7 +195,7 @@ class KeypointPainter: y1 -= 2.0 y2 += 2.0 - ax.text(x1 + 2, y1 - 2, text, fontsize=8, + ax.text(x1 + 2, y1 - 2, text, fontsize=fontsize, color='white', bbox={'facecolor': color, 'alpha': 0.5, 'linewidth': 0}) @staticmethod @@ -171,7 +207,9 @@ class KeypointPainter: matplotlib.patches.Rectangle( (x - scale, y - scale), 2 * scale, 2 * scale, fill=False, color=color)) - def keypoints(self, ax, keypoint_sets, *, scores=None, color=None, colors=None, texts=None): + def keypoints(self, ax, keypoint_sets, *, + size=None, scores=None, color=None, + colors=None, texts=None, activities=None, dic_out=None): if keypoint_sets is None: return @@ -183,7 +221,7 @@ class KeypointPainter: for i, kps in enumerate(np.asarray(keypoint_sets)): assert kps.shape[1] == 3 x = kps[:, 0] * self.xy_scale - y = kps[:, 1] * self.xy_scale + y = kps[:, 1] * self.xy_scale * self.y_scale v = kps[:, 2] if colors is not None: @@ -192,7 +230,13 @@ class KeypointPainter: if isinstance(color, (int, np.integer)): color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20) - self._draw_skeleton(ax, x, y, v, color=color) + self._draw_skeleton(ax, x, y, v, i=i, size=size, color=color, activities=activities, dic_out=dic_out) + + score = scores[i] if scores is not None else None + if score is not None: + z_str = str(score).split(sep='.') + text = z_str[0] + '.' + z_str[1][0] + self._draw_text(ax, x[1:3], y[1:3]-5, v[1:3], text, color, fontsize=16) if self.show_box: score = scores[i] if scores is not None else None self._draw_box(ax, x, y, v, color, score) @@ -336,3 +380,78 @@ def white_screen(ax, alpha=0.9): plt.Rectangle((0, 0), 1, 1, transform=ax.transAxes, alpha=alpha, facecolor='white') ) + + +def get_pifpaf_outputs(annotations): + # TODO extract direct from predictions with pifpaf 0.11+ + """Extract keypoints sets and scores from output dictionary""" + if not annotations: + return [], [] + keypoints_sets = np.array([dic['keypoints'] + for dic in annotations]).reshape((-1, 17, 3)) + score_weights = np.ones((keypoints_sets.shape[0], 17)) + score_weights[:, 3] = 3.0 + score_weights /= np.sum(score_weights[0, :]) + kps_scores = keypoints_sets[:, :, 2] + ordered_kps_scores = np.sort(kps_scores, axis=1)[:, ::-1] + scores = np.sum(score_weights * ordered_kps_scores, axis=1) + return keypoints_sets, scores + + +def draw_orientation(ax, centers, sizes, angles, colors, mode): + + if mode == 'front': + length = 5 + fill = False + alpha = 0.6 + zorder_circle = 0.5 + zorder_arrow = 5 + linewidth = 1.5 + edgecolor = 'k' + radiuses = [s / 1.2 for s in sizes] + else: + length = 1.3 + head_width = 0.3 + linewidth = 2 + radiuses = [0.2] * len(centers) + # length = 1.6 + # head_width = 0.4 + # linewidth = 2.7 + radiuses = [0.2] * len(centers) + fill = True + alpha = 1 + zorder_circle = 2 + zorder_arrow = 1 + + for idx, theta in enumerate(angles): + color = colors[idx] + radius = radiuses[idx] + + if mode == 'front': + x_arr = centers[idx][0] + (length + radius) * math.cos(theta) + z_arr = length + centers[idx][1] + \ + (length + radius) * math.sin(theta) + delta_x = math.cos(theta) + delta_z = math.sin(theta) + head_width = max(10, radiuses[idx] / 1.5) + + else: + edgecolor = color + x_arr = centers[idx][0] + z_arr = centers[idx][1] + delta_x = length * math.cos(theta) + # keep into account kitti convention + delta_z = - length * math.sin(theta) + + circle = Circle(centers[idx], radius=radius, color=color, + fill=fill, alpha=alpha, zorder=zorder_circle) + arrow = FancyArrow(x_arr, z_arr, delta_x, delta_z, head_width=head_width, edgecolor=edgecolor, + facecolor=color, linewidth=linewidth, zorder=zorder_arrow) + ax.add_patch(circle) + ax.add_patch(arrow) + + +def social_distance_colors(colors, dic_out): + # Prepare color for social distancing + colors = ['r' if flag else colors[idx] for idx,flag in enumerate(dic_out['social_distance'])] + return colors diff --git a/monoloco/visuals/printer.py b/monoloco/visuals/printer.py index b8c21be..7a552d1 100644 --- a/monoloco/visuals/printer.py +++ b/monoloco/visuals/printer.py @@ -8,6 +8,7 @@ from collections import OrderedDict import matplotlib.pyplot as plt from matplotlib.patches import Rectangle +from .pifpaf_show import KeypointPainter, get_pifpaf_outputs, draw_orientation, social_distance_colors from ..utils import pixel_to_camera @@ -51,7 +52,6 @@ class Printer: boxes_gt, uv_camera, radius, auxs = nones(16) def __init__(self, image, output_path, kk, args): - self.im = image self.width = self.im.size[0] self.height = self.im.size[1] @@ -59,21 +59,27 @@ class Printer: self.kk = kk self.output_types = args.output_types self.z_max = args.z_max # set max distance to show instances - self.show = args.show - self.show_all = args.show_all - self.save = not args.no_save + self.webcam = args.webcam + self.show_all = args.show_all or self.webcam + self.show = args.show_all or self.webcam + self.save = not args.no_save and not self.webcam + self.plt_close = not self.webcam + self.activities = args.activities + self.hide_distance = args.hide_distance # define image attributes self.attr = image_attributes(args.dpi, args.output_types) def _process_results(self, dic_ann): # Include the vectors inside the interval given by z_max + self.angles = dic_ann['angles'] self.stds_ale = dic_ann['stds_ale'] self.stds_epi = dic_ann['stds_epi'] self.gt = dic_ann['gt'] # regulate ground-truth matching self.xx_gt = [xx[0] for xx in dic_ann['xyz_real']] self.xx_pred = [xx[0] for xx in dic_ann['xyz_pred']] + self.xz_centers = [[xx[0], xx[2]] for xx in dic_ann['xyz_pred']] # Set maximum distance self.dd_pred = dic_ann['dds_pred'] self.dd_real = dic_ann['dds_real'] @@ -86,6 +92,10 @@ class Printer: for idx, xx in enumerate(dic_ann['xyz_pred'])] self.uv_heads = dic_ann['uv_heads'] + self.centers = self.uv_heads + if 'multi' in self.output_types: + for center in self.centers: + center[1] = center[1] * self.y_scale self.uv_shoulders = dic_ann['uv_shoulders'] self.boxes = dic_ann['boxes'] self.boxes_gt = dic_ann['boxes_gt'] @@ -103,11 +113,15 @@ class Printer: def factory_axes(self, dic_out): """Create axes for figures: front bird multi""" + + plt.style.use('dark_background') + axes = [] figures = [] # Process the annotation dictionary of monoloco - self._process_results(dic_out) + if dic_out: + self._process_results(dic_out) # Initialize multi figure, resizing it for aesthetic proportion if 'multi' in self.output_types: @@ -129,6 +143,7 @@ class Printer: fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]}, figsize=(fig_width, fig_height)) + ax1.set_aspect(fig_ar_1) fig.set_tight_layout(True) fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02) @@ -165,7 +180,58 @@ class Printer: axes.append(ax1) return figures, axes - def draw(self, figures, axes, image): + + def _webcam_front(self, axis, colors, activities, annotations, dic_out): + sizes = [abs(self.centers[idx][1] - uv_s[1]*self.y_scale) / 1.5 for idx, uv_s in + enumerate(self.uv_shoulders)] + + keypoint_sets, _ = get_pifpaf_outputs(annotations) + keypoint_painter = KeypointPainter(show_box=False, y_scale=self.y_scale) + + if not self.hide_distance: + scores = self.dd_pred + else: + scores=None + + keypoint_painter.keypoints( + axis, keypoint_sets, size=self.im.size, + scores=scores, colors=colors, activities=activities, dic_out=dic_out) + + draw_orientation(axis, self.centers, + sizes, self.angles, colors, mode='front') + + + def _front_loop(self, iterator, axes, number, colors, annotations, dic_out): + for idx in iterator: + if any(xx in self.output_types for xx in ['front', 'multi']) and self.zz_pred[idx] > 0: + if self.webcam: + self._webcam_front(axes[0], colors, self.activities, annotations, dic_out) + else: + self._draw_front(axes[0], + self.dd_pred[idx], + idx, + number) + number['num'] += 1 + + + def _bird_loop(self, iterator, axes, colors, number): + for idx in iterator: + if any(xx in self.output_types for xx in ['bird', 'multi']) and self.zz_pred[idx] > 0: + draw_orientation(axes[1], self.xz_centers, [], self.angles, colors, mode='bird') + # Draw ground truth and uncertainty + self._draw_uncertainty(axes, idx) + + # Draw bird eye view text + if number['flag']: + self._draw_text_bird(axes, idx, number['num']) + number['num'] += 1 + + + def draw(self, figures, axes, image, dic_out=None, annotations=None): + + colors = ['deepskyblue' for _ in self.uv_heads] + if 'social_distance' in self.activities: + colors = social_distance_colors(colors, dic_out) # whether to include instances that don't match the ground-truth iterator = range(len(self.zz_pred)) if self.show_all else range(len(self.zz_gt)) @@ -176,27 +242,16 @@ class Printer: number = dict(flag=False, num=97) if any(xx in self.output_types for xx in ['front', 'multi']): number['flag'] = True # add numbers - self.mpl_im0.set_data(image) - for idx in iterator: - if any(xx in self.output_types for xx in ['front', 'multi']) and self.zz_pred[idx] > 0: - self._draw_front(axes[0], - self.dd_pred[idx], - idx, - number) - number['num'] += 1 + # Remove image if social distance is activated + if 'social_distance' not in self.activities: + self.mpl_im0.set_data(image) + + self._front_loop(iterator, axes, number, colors, annotations, dic_out) # Draw the bird figure number['num'] = 97 - for idx in iterator: - if any(xx in self.output_types for xx in ['bird', 'multi']) and self.zz_pred[idx] > 0: + self._bird_loop(iterator, axes, colors, number) - # Draw ground truth and uncertainty - self._draw_uncertainty(axes, idx) - - # Draw bird eye view text - if number['flag']: - self._draw_text_bird(axes, idx, number['num']) - number['num'] += 1 self._draw_legend(axes) # Draw, save or/and show the figures @@ -206,7 +261,9 @@ class Printer: fig.savefig(self.output_path + self.extensions[idx], bbox_inches='tight', dpi=self.attr['dpi']) if self.show: fig.show() - plt.close(fig) + if self.plt_close: + plt.close(fig) + def _draw_front(self, ax, z, idx, number): @@ -230,23 +287,24 @@ class Printer: x_t = x0 - 1.5 y_t = y1 + self.attr['y_box_margin'] if y_t < (self.height-10): - ax.annotate( - text, - (x_t, y_t), - fontsize=self.attr['fontsize_d'], - weight='bold', - xytext=(5.0, 5.0), - textcoords='offset points', - color='white', - bbox=bbox_config, - ) - if number['flag']: - ax.text(x0 - 17, - y1 + 14, - chr(number['num']), - fontsize=self.attr['fontsize_num'], - color=self.attr[self.modes[idx]]['numcolor'], - weight='bold') + if not self.hide_distance: + ax.annotate( + text, + (x_t, y_t), + fontsize=self.attr['fontsize_d'], + weight='bold', + xytext=(5.0, 5.0), + textcoords='offset points', + color='white', + bbox=bbox_config, + ) + if number['flag']: + ax.text(x0 - 17, + y1 + 14, + chr(number['num']), + fontsize=self.attr['fontsize_num'], + color=self.attr[self.modes[idx]]['numcolor'], + weight='bold') def _draw_text_bird(self, axes, idx, num): """Plot the number in the bird eye view map""" @@ -360,20 +418,23 @@ class Printer: ax.set_axis_off() ax.set_xlim(0, self.width) ax.set_ylim(self.height, 0) - self.mpl_im0 = ax.imshow(self.im) + if not self.activities or 'social_distance' not in self.activities: + self.mpl_im0 = ax.imshow(self.im) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) else: uv_max = [0., float(self.height)] xyz_max = pixel_to_camera(uv_max, self.kk, self.z_max) - x_max = abs(xyz_max[0]) # shortcut to avoid oval circles in case of different kk + x_max = abs(xyz_max[0]) # shortcut to avoid oval circles in case of different kk corr = round(float(x_max / 3)) - ax.plot([0, x_max], [0, self.z_max], 'k--') - ax.plot([0, -x_max], [0, self.z_max], 'k--') + ax.plot([0, x_max], [0, self.z_max], 'w--') + ax.plot([0, -x_max], [0, self.z_max], 'w--') ax.set_xlim(-x_max + corr, x_max - corr) ax.set_ylim(0, self.z_max + 1) ax.set_xlabel("X [m]") + ax.set_box_aspect(.8) + plt.xlim((-x_max, x_max)) plt.xticks(fontsize=self.attr['fontsize_ax']) plt.yticks(fontsize=self.attr['fontsize_ax']) return ax diff --git a/monoloco/visuals/webcam.py b/monoloco/visuals/webcam.py new file mode 100644 index 0000000..5dc3e84 --- /dev/null +++ b/monoloco/visuals/webcam.py @@ -0,0 +1,198 @@ +# pylint: disable=W0212 +""" +Webcam demo application + +Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/openpifpaf/webcam.py + +""" + +import time +import logging + +import torch +import matplotlib.pyplot as plt +from PIL import Image +try: + import cv2 +except ImportError: + cv2 = None + +from openpifpaf import decoder, network, visualizer, show, logger +import openpifpaf.datasets as datasets +from openpifpaf.predict import processor_factory, preprocess_factory + +from ..visuals import Printer +from ..network import Loco +from ..network.process import preprocess_pifpaf, factory_for_gt +from ..predict import download_checkpoints + +LOG = logging.getLogger(__name__) + +def factory_from_args(args): + + # Model + dic_models = download_checkpoints(args) + args.checkpoint = dic_models['keypoints'] + + logger.configure(args, LOG) # logger first + + assert len(args.output_types) == 1 and 'json' not in args.output_types + + # Devices + args.device = torch.device('cpu') + args.pin_memory = False + if torch.cuda.is_available(): + args.device = torch.device('cuda') + args.pin_memory = True + LOG.debug('neural network device: %s', args.device) + + # Add visualization defaults + args.figure_width = 10 + args.dpi_factor = 1.0 + + args.z_max = 10 + args.show_all = True + args.no_save = True + args.batch_size = 1 + + if args.long_edge is None: + args.long_edge = 144 + # Make default pifpaf argument + args.force_complete_pose = True + LOG.info("Force complete pose is active") + + # Configure + decoder.configure(args) + network.Factory.configure(args) + show.configure(args) + visualizer.configure(args) + + return args, dic_models + + +def webcam(args): + + assert args.mode in 'mono' + + args, dic_models = factory_from_args(args) + + # Load Models + net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device, + n_dropout=args.n_dropout, p_dropout=args.dropout) + + processor, pifpaf_model = processor_factory(args) + preprocess = preprocess_factory(args) + + # Start recording + cam = cv2.VideoCapture(args.camera) + visualizer_mono = None + + while True: + start = time.time() + ret, frame = cam.read() + scale = (args.long_edge)/frame.shape[0] + image = cv2.resize(frame, None, fx=scale, fy=scale) + height, width, _ = image.shape + print('resized image size: {}'.format(image.shape)) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(image) + + data = datasets.PilImageList( + [pil_image], preprocess=preprocess) + + data_loader = torch.utils.data.DataLoader( + data, batch_size=1, shuffle=False, + pin_memory=False, collate_fn=datasets.collate_images_anns_meta) + + for (image_tensors_batch, _, meta_batch) in data_loader: + pred_batch = processor.batch( + pifpaf_model, image_tensors_batch, device=args.device) + + for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)): + pred = [ann.inverse_transform(meta) for ann in pred] + + if idx == 0: + pifpaf_outs = { + 'pred': pred, + 'left': [ann.json_data() for ann in pred], + 'image': image} + + if not ret: + break + key = cv2.waitKey(1) + if key % 256 == 27: + # ESC pressed + print("Escape hit, closing...") + break + + kk, dic_gt = factory_for_gt(pil_image.size, focal_length=args.focal) + boxes, keypoints = preprocess_pifpaf( + pifpaf_outs['left'], (width, height)) + + dic_out = net.forward(keypoints, kk) + dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt) + + if 'social_distance' in args.activities: + dic_out = net.social_distance(dic_out, args) + if 'raise_hand' in args.activities: + dic_out = net.raising_hand(dic_out, keypoints) + if visualizer_mono is None: # it is, at the beginning + visualizer_mono = Visualizer(kk, args)(pil_image) # create it with the first image + visualizer_mono.send(None) + + print(dic_out) + visualizer_mono.send((pil_image, dic_out, pifpaf_outs)) + + end = time.time() + print("run-time: {:.2f} ms".format((end-start)*1000)) + + cam.release() + + cv2.destroyAllWindows() + + +class Visualizer: + def __init__(self, kk, args): + self.kk = kk + self.args = args + + def __call__(self, first_image, fig_width=1.0, **kwargs): + if 'figsize' not in kwargs: + kwargs['figsize'] = (fig_width, fig_width * + first_image.size[0] / first_image.size[1]) + + printer = Printer(first_image, output_path="", + kk=self.kk, args=self.args) + + figures, axes = printer.factory_axes(None) + + for fig in figures: + fig.show() + + while True: + image, dic_out, pifpaf_outs = yield + + # Clears previous annotations between frames + axes[0].patches = [] + axes[0].lines = [] + axes[0].texts = [] + if len(axes) > 1: + axes[1].patches = [] + axes[1].lines = [axes[1].lines[0], axes[1].lines[1]] + axes[1].texts = [] + + if dic_out and dic_out['dds_pred']: + printer._process_results(dic_out) + printer.draw(figures, axes, image, dic_out, pifpaf_outs['left']) + mypause(0.01) + + +def mypause(interval): + manager = plt._pylab_helpers.Gcf.get_active() + if manager is not None: + canvas = manager.canvas + if canvas.figure.stale: + canvas.draw_idle() + canvas.start_event_loop(interval) + else: + time.sleep(interval) diff --git a/tests/test_train_mono.py b/tests/test_train_mono.py index 964ea3d..af08a10 100644 --- a/tests/test_train_mono.py +++ b/tests/test_train_mono.py @@ -33,7 +33,7 @@ PREDICT_COMMAND_SOCIAL_DISTANCE = [ 'python3', '-m', 'monoloco.run', 'predict', 'docs/frame0032.jpg', - '--social_distance', + '--activities', 'social_distance', '--output_types', 'front', 'bird', '--decoder-workers=0' # for windows' ]