From 256102021a64be6515b451c4714a0b3de7a484f7 Mon Sep 17 00:00:00 2001 From: charlesbvll Date: Sun, 28 Mar 2021 15:10:38 +0200 Subject: [PATCH] Working webcam and risen hand detection --- monoloco/activity.py | 14 +++---- monoloco/predict.py | 8 ++-- monoloco/run.py | 16 +++----- monoloco/visuals/pifpaf_show.py | 12 ++++-- monoloco/visuals/printer.py | 3 +- monoloco/visuals/webcam.py | 66 ++++++++++++++++----------------- 6 files changed, 59 insertions(+), 60 deletions(-) diff --git a/monoloco/activity.py b/monoloco/activity.py index 5b6e81f..0065936 100644 --- a/monoloco/activity.py +++ b/monoloco/activity.py @@ -72,18 +72,17 @@ def is_raising_hand(keypoint): l_hand = 9 r_shoulder = 6 r_hand = 10 - h_offset = 10 + l_ear = 3 + r_ear = 4 + h_offset = 20 - if ((keypoint[1][l_hand] < keypoint[1][l_shoulder] and - keypoint[1][r_hand] < keypoint[1][r_shoulder]) and - (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder] and - keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder])): + if keypoint[1][l_hand] < keypoint[1][l_shoulder] and keypoint[1][r_hand] < keypoint[1][r_shoulder]: return 'both' - if (keypoint[1][l_hand] < keypoint[1][l_shoulder]) and (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder]): + if keypoint[1][l_hand] < keypoint[1][l_shoulder]: return 'left' - if keypoint[1][r_hand] < keypoint[1][r_shoulder] and keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder]: + if keypoint[1][r_hand] < keypoint[1][r_shoulder]: return 'right' return 'none' @@ -157,6 +156,7 @@ def show_activities(args, image_t, output_path, annotations, dic_out): r_h = 'none' if 'raise_hand' in args.activities: r_h = dic_out['raising_hand'] + print("RAISE_HAND :", r_h) with image_canvas(image_t, output_path + '.front.png', diff --git a/monoloco/predict.py b/monoloco/predict.py index 6f940e7..38de93b 100644 --- a/monoloco/predict.py +++ b/monoloco/predict.py @@ -21,7 +21,7 @@ from openpifpaf import decoder, network, visualizer, show, logger from .visuals.printer import Printer from .network import Loco from .network.process import factory_for_gt, preprocess_pifpaf -from .activity import show_activities, show_social +from .activity import show_activities LOG = logging.getLogger(__name__) @@ -63,7 +63,7 @@ def download_checkpoints(args): assert not args.social_distance, "Social distance not supported in stereo modality" path = MONSTEREO_MODEL name = 'monstereo-201202-1212.pkl' - elif args.social_distance: + elif args.social_distance or (args.activities and 'social_distance' in args.activities) or args.webcam: path = MONOLOCO_MODEL_NU name = 'monoloco_pp-201207-1350.pkl' else: @@ -204,8 +204,10 @@ def predict(args): LOG.info("Prediction with MonoLoco++") dic_out = net.forward(keypoints, kk) dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt) - if args.social_distance: + if args.social_distance or (args.activities and 'social_distance' in args.activities): dic_out = net.social_distance(dic_out, args) + if args.activities and 'raise_hand' in args.activities: + dic_out = net.raising_hand(dic_out, keypoints) else: LOG.info("Prediction with MonStereo") diff --git a/monoloco/run.py b/monoloco/run.py index 6c8210c..713721f 100644 --- a/monoloco/run.py +++ b/monoloco/run.py @@ -47,18 +47,18 @@ def cli(): # Monoloco predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand') - predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo', default='monoloco_pp') - predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True) - predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512) - predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization', - default='data/arrays/names-kitti-200615-1022.json') - predict_parser.add_argument('--transform', help='transformation for the pose', default='None') + predict_parser.add_argument('--mode', help='keypoints, mono, stereo', default='mono') + predict_parser.add_argument('--model', help='path of MonoLoco/MonStereo model to load') + predict_parser.add_argument('--net', help='only to select older MonoLoco model, otherwise use --mode') + predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization') + #default='data/arrays/names-kitti-200615-1022.json') predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100) predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0) predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2) predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true') predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true') predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image') + # Social distancing and social interactions predict_parser.add_argument('--social_distance', help='social', action='store_true') predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25) @@ -128,10 +128,6 @@ def main(): args = cli() if args.command == 'predict': if args.webcam: - if 'json'in args.output_types: - args.output_types = 'multi' - if args.z_max == 100: - args.z_max = 10 from .visuals.webcam import webcam webcam(args) else: diff --git a/monoloco/visuals/pifpaf_show.py b/monoloco/visuals/pifpaf_show.py index b0b309f..5c10ae5 100644 --- a/monoloco/visuals/pifpaf_show.py +++ b/monoloco/visuals/pifpaf_show.py @@ -200,11 +200,15 @@ class KeypointPainter(object): if isinstance(color, (int, np.integer)): color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20) - self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i]) + if raise_hand is not 'none': + self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i]) + else: + self._draw_skeleton(ax, x, y, v, color=color) score = scores[i] if scores is not None else None - z_str = str(score).split(sep='.') - text = z_str[0] + '.' + z_str[1][0] - self._draw_text(ax, x-2, y, v, text, color) + if score is not None: + z_str = str(score).split(sep='.') + text = z_str[0] + '.' + z_str[1][0] + self._draw_text(ax, x-2, y, v, text, color) if self.show_box: score = scores[i] if scores is not None else None self._draw_box(ax, x, y, v, color, score) diff --git a/monoloco/visuals/printer.py b/monoloco/visuals/printer.py index 7e52843..0d82850 100644 --- a/monoloco/visuals/printer.py +++ b/monoloco/visuals/printer.py @@ -139,6 +139,7 @@ class Printer: fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]}, figsize=(fig_width, fig_height)) + ax1.set_aspect(fig_ar_1) fig.set_tight_layout(True) fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02) @@ -194,7 +195,7 @@ class Printer: def social_distance_bird(self, axis, colors): draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird') - def draw(self, figures, axes, image, dic_out, annotations): + def draw(self, figures, axes, image, dic_out=None, annotations=None): if self.args.activities: colors = ['deepskyblue' for _ in self.uv_heads] diff --git a/monoloco/visuals/webcam.py b/monoloco/visuals/webcam.py index bfa1cc3..e7c46a8 100644 --- a/monoloco/visuals/webcam.py +++ b/monoloco/visuals/webcam.py @@ -8,31 +8,31 @@ Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/ import time import os +import logging import torch import matplotlib.pyplot as plt from PIL import Image import cv2 -from openpifpaf import decoder, network, visualizer, show +from openpifpaf import decoder, network, visualizer, show, logger import openpifpaf.datasets as datasets from openpifpaf.predict import processor_factory, preprocess_factory from ..visuals import Printer from ..network import Loco from ..network.process import preprocess_pifpaf, factory_for_gt +from ..predict import download_checkpoints -OPENPIFPAF_PATH = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl' - +LOG = logging.getLogger(__name__) def factory_from_args(args): # Model - if not args.checkpoint: - if os.path.exists(OPENPIFPAF_PATH): - args.checkpoint = OPENPIFPAF_PATH - else: - args.checkpoint = 'shufflenetv2k30' + dic_models = download_checkpoints(args) + args.checkpoint = dic_models['keypoints'] + + logger.configure(args, LOG) # logger first # Devices args.device = torch.device('cpu') @@ -40,18 +40,20 @@ def factory_from_args(args): if torch.cuda.is_available(): args.device = torch.device('cuda') args.pin_memory = True + LOG.debug('neural network device: %s', args.device) # Add visualization defaults args.figure_width = 10 args.dpi_factor = 1.0 - if args.net == 'monstereo': - args.batch_size = 2 - else: - args.batch_size = 1 + args.z_max = 10 + args.show_all = True + args.no_save = True + args.batch_size = 1 # Make default pifpaf argument args.force_complete_pose = True + LOG.info("Force complete pose is active") # Configure decoder.configure(args) @@ -59,22 +61,24 @@ def factory_from_args(args): show.configure(args) visualizer.configure(args) - return args + return args, dic_models def webcam(args): + + assert args.mode in ('mono') + args, dic_models = factory_from_args(args) - args = factory_from_args(args) # Load Models - net = Loco(model=args.model, net=args.net, device=args.device, + net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - processor, model = processor_factory(args) + processor, pifpaf_model = processor_factory(args) preprocess = preprocess_factory(args) # Start recording cam = cv2.VideoCapture(0) - visualizer_monstereo = None + visualizer_mono = None while True: start = time.time() @@ -86,7 +90,7 @@ def webcam(args): pil_image = Image.fromarray(image) data = datasets.PilImageList( - make_list(pil_image), preprocess=preprocess) + [pil_image], preprocess=preprocess) data_loader = torch.utils.data.DataLoader( data, batch_size=1, shuffle=False, @@ -94,7 +98,7 @@ def webcam(args): for (image_tensors_batch, _, meta_batch) in data_loader: pred_batch = processor.batch( - model, image_tensors_batch, device=args.device) + pifpaf_model, image_tensors_batch, device=args.device) for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)): pred = [ann.inverse_transform(meta) for ann in pred] @@ -104,8 +108,6 @@ def webcam(args): 'pred': pred, 'left': [ann.json_data() for ann in pred], 'image': image} - else: - pifpaf_outs['right'] = [ann.json_data() for ann in pred] if not ret: break @@ -114,10 +116,9 @@ def webcam(args): # ESC pressed print("Escape hit, closing...") break + intrinsic_size = [xx * 1.3 for xx in pil_image.size] - kk, dic_gt = factory_for_gt(intrinsic_size, - focal_length=args.focal, - path_gt=args.path_gt) # better intrinsics for mac camera + kk, dic_gt = factory_for_gt(intrinsic_size, focal_length=args.focal) # better intrinsics for mac camera boxes, keypoints = preprocess_pifpaf( pifpaf_outs['left'], (width, height)) @@ -129,13 +130,12 @@ def webcam(args): dic_out = net.social_distance(dic_out, args) if 'raise_hand' in args.activities: dic_out = net.raising_hand(dic_out, keypoints) - if visualizer_monstereo is None: # it is, at the beginning - visualizer_monstereo = VisualizerMonstereo(kk, - args)(pil_image) # create it with the first image - visualizer_monstereo.send(None) + if visualizer_mono is None: # it is, at the beginning + visualizer_mono = Visualizer(kk, args)(pil_image) # create it with the first image + visualizer_mono.send(None) print(dic_out) - visualizer_monstereo.send((pil_image, dic_out, pifpaf_outs)) + visualizer_mono.send((pil_image, dic_out, pifpaf_outs)) end = time.time() print("run-time: {:.2f} ms".format((end-start)*1000)) @@ -145,7 +145,7 @@ def webcam(args): cv2.destroyAllWindows() -class VisualizerMonstereo: +class Visualizer: def __init__(self, kk, args): self.kk = kk self.args = args @@ -189,8 +189,4 @@ def mypause(interval): canvas.draw_idle() canvas.start_event_loop(interval) else: - time.sleep(interval) - - -def make_list(*args): - return list(args) + time.sleep(interval) \ No newline at end of file