From 256102021a64be6515b451c4714a0b3de7a484f7 Mon Sep 17 00:00:00 2001
From: charlesbvll <charles.beauville@epfl.ch>
Date: Sun, 28 Mar 2021 15:10:38 +0200
Subject: [PATCH] Working webcam and risen hand detection

---
 monoloco/activity.py            | 14 +++----
 monoloco/predict.py             |  8 ++--
 monoloco/run.py                 | 16 +++-----
 monoloco/visuals/pifpaf_show.py | 12 ++++--
 monoloco/visuals/printer.py     |  3 +-
 monoloco/visuals/webcam.py      | 66 ++++++++++++++++-----------------
 6 files changed, 59 insertions(+), 60 deletions(-)

diff --git a/monoloco/activity.py b/monoloco/activity.py
index 5b6e81f..0065936 100644
--- a/monoloco/activity.py
+++ b/monoloco/activity.py
@@ -72,18 +72,17 @@ def is_raising_hand(keypoint):
     l_hand = 9
     r_shoulder = 6
     r_hand = 10
-    h_offset = 10
+    l_ear = 3
+    r_ear = 4
+    h_offset = 20
 
-    if ((keypoint[1][l_hand] < keypoint[1][l_shoulder] and
-         keypoint[1][r_hand] < keypoint[1][r_shoulder]) and
-            (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder] and
-             keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder])):
+    if keypoint[1][l_hand] < keypoint[1][l_shoulder] and keypoint[1][r_hand] < keypoint[1][r_shoulder]: 
         return 'both'
 
-    if (keypoint[1][l_hand] < keypoint[1][l_shoulder]) and (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder]):
+    if keypoint[1][l_hand] < keypoint[1][l_shoulder]: 
         return 'left'
 
-    if keypoint[1][r_hand] < keypoint[1][r_shoulder] and keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder]:
+    if keypoint[1][r_hand] < keypoint[1][r_shoulder]: 
         return 'right'
 
     return 'none'
@@ -157,6 +156,7 @@ def show_activities(args, image_t, output_path, annotations, dic_out):
         r_h = 'none'
         if 'raise_hand' in args.activities:
             r_h = dic_out['raising_hand']
+        print("RAISE_HAND :", r_h)
 
         with image_canvas(image_t,
                           output_path + '.front.png',
diff --git a/monoloco/predict.py b/monoloco/predict.py
index 6f940e7..38de93b 100644
--- a/monoloco/predict.py
+++ b/monoloco/predict.py
@@ -21,7 +21,7 @@ from openpifpaf import decoder, network, visualizer, show, logger
 from .visuals.printer import Printer
 from .network import Loco
 from .network.process import factory_for_gt, preprocess_pifpaf
-from .activity import show_activities, show_social
+from .activity import show_activities
 
 LOG = logging.getLogger(__name__)
 
@@ -63,7 +63,7 @@ def download_checkpoints(args):
         assert not args.social_distance, "Social distance not supported in stereo modality"
         path = MONSTEREO_MODEL
         name = 'monstereo-201202-1212.pkl'
-    elif args.social_distance:
+    elif args.social_distance or (args.activities and 'social_distance' in args.activities) or args.webcam:
         path = MONOLOCO_MODEL_NU
         name = 'monoloco_pp-201207-1350.pkl'
     else:
@@ -204,8 +204,10 @@ def predict(args):
                 LOG.info("Prediction with MonoLoco++")
                 dic_out = net.forward(keypoints, kk)
                 dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt)
-                if args.social_distance:
+                if args.social_distance or (args.activities and 'social_distance' in args.activities):
                     dic_out = net.social_distance(dic_out, args)
+                if args.activities and 'raise_hand' in args.activities:
+                    dic_out = net.raising_hand(dic_out, keypoints)
 
             else:
                 LOG.info("Prediction with MonStereo")
diff --git a/monoloco/run.py b/monoloco/run.py
index 6c8210c..713721f 100644
--- a/monoloco/run.py
+++ b/monoloco/run.py
@@ -47,18 +47,18 @@ def cli():
 
     # Monoloco
     predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand')
-    predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo', default='monoloco_pp')
-    predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True)
-    predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512)
-    predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
-                                default='data/arrays/names-kitti-200615-1022.json')
-    predict_parser.add_argument('--transform', help='transformation for the pose', default='None')
+    predict_parser.add_argument('--mode', help='keypoints, mono, stereo', default='mono')
+    predict_parser.add_argument('--model', help='path of MonoLoco/MonStereo model to load')
+    predict_parser.add_argument('--net', help='only to select older MonoLoco model, otherwise use --mode')
+    predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization')
+                                #default='data/arrays/names-kitti-200615-1022.json')
     predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100)
     predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
     predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
     predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
     predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true')
     predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image')
+
     # Social distancing and social interactions
     predict_parser.add_argument('--social_distance', help='social', action='store_true')
     predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
@@ -128,10 +128,6 @@ def main():
     args = cli()
     if args.command == 'predict':
         if args.webcam:
-            if 'json'in args.output_types:
-                args.output_types = 'multi'
-            if args.z_max == 100:
-                args.z_max = 10
             from .visuals.webcam import webcam
             webcam(args)
         else:
diff --git a/monoloco/visuals/pifpaf_show.py b/monoloco/visuals/pifpaf_show.py
index b0b309f..5c10ae5 100644
--- a/monoloco/visuals/pifpaf_show.py
+++ b/monoloco/visuals/pifpaf_show.py
@@ -200,11 +200,15 @@ class KeypointPainter(object):
             if isinstance(color, (int, np.integer)):
                 color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20)
 
-            self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
+            if raise_hand is not 'none':
+                self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
+            else:
+                self._draw_skeleton(ax, x, y, v, color=color)
             score = scores[i] if scores is not None else None
-            z_str = str(score).split(sep='.')
-            text = z_str[0] + '.' + z_str[1][0]
-            self._draw_text(ax, x-2, y, v, text, color)
+            if score is not None:
+                z_str = str(score).split(sep='.')
+                text = z_str[0] + '.' + z_str[1][0]
+                self._draw_text(ax, x-2, y, v, text, color)
             if self.show_box:
                 score = scores[i] if scores is not None else None
                 self._draw_box(ax, x, y, v, color, score)
diff --git a/monoloco/visuals/printer.py b/monoloco/visuals/printer.py
index 7e52843..0d82850 100644
--- a/monoloco/visuals/printer.py
+++ b/monoloco/visuals/printer.py
@@ -139,6 +139,7 @@ class Printer:
 
             fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]},
                                            figsize=(fig_width, fig_height))
+           
             ax1.set_aspect(fig_ar_1)
             fig.set_tight_layout(True)
             fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02)
@@ -194,7 +195,7 @@ class Printer:
     def social_distance_bird(self, axis, colors):
         draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird')
 
-    def draw(self, figures, axes, image, dic_out, annotations):
+    def draw(self, figures, axes, image, dic_out=None, annotations=None):
 
         if self.args.activities:
             colors = ['deepskyblue' for _ in self.uv_heads]
diff --git a/monoloco/visuals/webcam.py b/monoloco/visuals/webcam.py
index bfa1cc3..e7c46a8 100644
--- a/monoloco/visuals/webcam.py
+++ b/monoloco/visuals/webcam.py
@@ -8,31 +8,31 @@ Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/
 
 import time
 import os
+import logging
 
 import torch
 import matplotlib.pyplot as plt
 from PIL import Image
 import cv2
 
-from openpifpaf import decoder, network, visualizer, show
+from openpifpaf import decoder, network, visualizer, show, logger
 import openpifpaf.datasets as datasets
 from openpifpaf.predict import processor_factory, preprocess_factory
 
 from ..visuals import Printer
 from ..network import Loco
 from ..network.process import preprocess_pifpaf, factory_for_gt
+from ..predict import download_checkpoints
 
-OPENPIFPAF_PATH = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl'
-
+LOG = logging.getLogger(__name__)
 
 def factory_from_args(args):
 
     # Model
-    if not args.checkpoint:
-        if os.path.exists(OPENPIFPAF_PATH):
-            args.checkpoint = OPENPIFPAF_PATH
-        else:
-            args.checkpoint = 'shufflenetv2k30'
+    dic_models = download_checkpoints(args)
+    args.checkpoint = dic_models['keypoints']
+
+    logger.configure(args, LOG)  # logger first
 
     # Devices
     args.device = torch.device('cpu')
@@ -40,18 +40,20 @@ def factory_from_args(args):
     if torch.cuda.is_available():
         args.device = torch.device('cuda')
         args.pin_memory = True
+    LOG.debug('neural network device: %s', args.device)
 
     # Add visualization defaults
     args.figure_width = 10
     args.dpi_factor = 1.0
 
-    if args.net == 'monstereo':
-        args.batch_size = 2
-    else:
-        args.batch_size = 1
+    args.z_max = 10
+    args.show_all = True
+    args.no_save = True
+    args.batch_size = 1
 
     # Make default pifpaf argument
     args.force_complete_pose = True
+    LOG.info("Force complete pose is active")
 
     # Configure
     decoder.configure(args)
@@ -59,22 +61,24 @@ def factory_from_args(args):
     show.configure(args)
     visualizer.configure(args)
 
-    return args
+    return args, dic_models
 
 
 def webcam(args):
+    
+    assert args.mode in ('mono')
+    args, dic_models = factory_from_args(args)
 
-    args = factory_from_args(args)
     # Load Models
-    net = Loco(model=args.model, net=args.net, device=args.device,
+    net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device,
                n_dropout=args.n_dropout, p_dropout=args.dropout)
 
-    processor, model = processor_factory(args)
+    processor, pifpaf_model = processor_factory(args)
     preprocess = preprocess_factory(args)
 
     # Start recording
     cam = cv2.VideoCapture(0)
-    visualizer_monstereo = None
+    visualizer_mono = None
 
     while True:
         start = time.time()
@@ -86,7 +90,7 @@ def webcam(args):
         pil_image = Image.fromarray(image)
 
         data = datasets.PilImageList(
-            make_list(pil_image), preprocess=preprocess)
+            [pil_image], preprocess=preprocess)
 
         data_loader = torch.utils.data.DataLoader(
             data, batch_size=1, shuffle=False,
@@ -94,7 +98,7 @@ def webcam(args):
 
         for (image_tensors_batch, _, meta_batch) in data_loader:
             pred_batch = processor.batch(
-                model, image_tensors_batch, device=args.device)
+                pifpaf_model, image_tensors_batch, device=args.device)
 
             for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
                 pred = [ann.inverse_transform(meta) for ann in pred]
@@ -104,8 +108,6 @@ def webcam(args):
                         'pred': pred,
                         'left': [ann.json_data() for ann in pred],
                         'image': image}
-                else:
-                    pifpaf_outs['right'] = [ann.json_data() for ann in pred]
 
         if not ret:
             break
@@ -114,10 +116,9 @@ def webcam(args):
             # ESC pressed
             print("Escape hit, closing...")
             break
+
         intrinsic_size = [xx * 1.3 for xx in pil_image.size]
-        kk, dic_gt = factory_for_gt(intrinsic_size,
-                                    focal_length=args.focal,
-                                    path_gt=args.path_gt)  # better intrinsics for mac camera
+        kk, dic_gt = factory_for_gt(intrinsic_size, focal_length=args.focal)  # better intrinsics for mac camera
         boxes, keypoints = preprocess_pifpaf(
             pifpaf_outs['left'], (width, height))
 
@@ -129,13 +130,12 @@ def webcam(args):
                 dic_out = net.social_distance(dic_out, args)
             if 'raise_hand' in args.activities:
                 dic_out = net.raising_hand(dic_out, keypoints)
-        if visualizer_monstereo is None:  # it is, at the beginning
-            visualizer_monstereo = VisualizerMonstereo(kk,
-                                                       args)(pil_image)  # create it with the first image
-            visualizer_monstereo.send(None)
+        if visualizer_mono is None:  # it is, at the beginning
+            visualizer_mono = Visualizer(kk, args)(pil_image)  # create it with the first image
+            visualizer_mono.send(None)
 
         print(dic_out)
-        visualizer_monstereo.send((pil_image, dic_out, pifpaf_outs))
+        visualizer_mono.send((pil_image, dic_out, pifpaf_outs))
 
         end = time.time()
         print("run-time: {:.2f} ms".format((end-start)*1000))
@@ -145,7 +145,7 @@ def webcam(args):
     cv2.destroyAllWindows()
 
 
-class VisualizerMonstereo:
+class Visualizer:
     def __init__(self, kk, args):
         self.kk = kk
         self.args = args
@@ -189,8 +189,4 @@ def mypause(interval):
             canvas.draw_idle()
         canvas.start_event_loop(interval)
     else:
-        time.sleep(interval)
-
-
-def make_list(*args):
-    return list(args)
+        time.sleep(interval)
\ No newline at end of file