Working webcam and risen hand detection

2021-03-28 15:10:38 +02:00 · 2021-03-28 15:10:38 +02:00 · 256102021a
commit 256102021a
parent ea63dd5781
6 changed files with 59 additions and 60 deletions
--- a/monoloco/activity.py
+++ b/monoloco/activity.py
@ -72,18 +72,17 @@ def is_raising_hand(keypoint):
    l_hand = 9
    r_shoulder = 6
    r_hand = 10
-    h_offset = 10
+    l_ear = 3
+    r_ear = 4
+    h_offset = 20

-    if ((keypoint[1][l_hand] < keypoint[1][l_shoulder] and
-         keypoint[1][r_hand] < keypoint[1][r_shoulder]) and
-            (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder] and
-             keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder])):
+    if keypoint[1][l_hand] < keypoint[1][l_shoulder] and keypoint[1][r_hand] < keypoint[1][r_shoulder]: 
        return 'both'

-    if (keypoint[1][l_hand] < keypoint[1][l_shoulder]) and (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder]):
+    if keypoint[1][l_hand] < keypoint[1][l_shoulder]: 
        return 'left'

-    if keypoint[1][r_hand] < keypoint[1][r_shoulder] and keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder]:
+    if keypoint[1][r_hand] < keypoint[1][r_shoulder]: 
        return 'right'

    return 'none'
@ -157,6 +156,7 @@ def show_activities(args, image_t, output_path, annotations, dic_out):
        r_h = 'none'
        if 'raise_hand' in args.activities:
            r_h = dic_out['raising_hand']
+        print("RAISE_HAND :", r_h)

        with image_canvas(image_t,
                          output_path + '.front.png',
--- a/monoloco/predict.py
+++ b/monoloco/predict.py
@ -21,7 +21,7 @@ from openpifpaf import decoder, network, visualizer, show, logger
 from .visuals.printer import Printer
 from .network import Loco
 from .network.process import factory_for_gt, preprocess_pifpaf
-from .activity import show_activities, show_social
+from .activity import show_activities

 LOG = logging.getLogger(__name__)

@ -63,7 +63,7 @@ def download_checkpoints(args):
        assert not args.social_distance, "Social distance not supported in stereo modality"
        path = MONSTEREO_MODEL
        name = 'monstereo-201202-1212.pkl'
-    elif args.social_distance:
+    elif args.social_distance or (args.activities and 'social_distance' in args.activities) or args.webcam:
        path = MONOLOCO_MODEL_NU
        name = 'monoloco_pp-201207-1350.pkl'
    else:
@ -204,8 +204,10 @@ def predict(args):
                LOG.info("Prediction with MonoLoco++")
                dic_out = net.forward(keypoints, kk)
                dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt)
-                if args.social_distance:
+                if args.social_distance or (args.activities and 'social_distance' in args.activities):
                    dic_out = net.social_distance(dic_out, args)
+                if args.activities and 'raise_hand' in args.activities:
+                    dic_out = net.raising_hand(dic_out, keypoints)

            else:
                LOG.info("Prediction with MonStereo")
--- a/monoloco/run.py
+++ b/monoloco/run.py
@ -47,18 +47,18 @@ def cli():

    # Monoloco
    predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand')
-    predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo', default='monoloco_pp')
-    predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True)
-    predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512)
-    predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
-                                default='data/arrays/names-kitti-200615-1022.json')
-    predict_parser.add_argument('--transform', help='transformation for the pose', default='None')
+    predict_parser.add_argument('--mode', help='keypoints, mono, stereo', default='mono')
+    predict_parser.add_argument('--model', help='path of MonoLoco/MonStereo model to load')
+    predict_parser.add_argument('--net', help='only to select older MonoLoco model, otherwise use --mode')
+    predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization')
+                                #default='data/arrays/names-kitti-200615-1022.json')
    predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100)
    predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
    predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
    predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
    predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true')
    predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image')
+
    # Social distancing and social interactions
    predict_parser.add_argument('--social_distance', help='social', action='store_true')
    predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
@ -128,10 +128,6 @@ def main():
    args = cli()
    if args.command == 'predict':
        if args.webcam:
-            if 'json'in args.output_types:
-                args.output_types = 'multi'
-            if args.z_max == 100:
-                args.z_max = 10
            from .visuals.webcam import webcam
            webcam(args)
        else:
--- a/monoloco/visuals/pifpaf_show.py
+++ b/monoloco/visuals/pifpaf_show.py
@ -200,11 +200,15 @@ class KeypointPainter(object):
            if isinstance(color, (int, np.integer)):
                color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20)

-            self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
+            if raise_hand is not 'none':
+                self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
+            else:
+                self._draw_skeleton(ax, x, y, v, color=color)
            score = scores[i] if scores is not None else None
-            z_str = str(score).split(sep='.')
-            text = z_str[0] + '.' + z_str[1][0]
-            self._draw_text(ax, x-2, y, v, text, color)
+            if score is not None:
+                z_str = str(score).split(sep='.')
+                text = z_str[0] + '.' + z_str[1][0]
+                self._draw_text(ax, x-2, y, v, text, color)
            if self.show_box:
                score = scores[i] if scores is not None else None
                self._draw_box(ax, x, y, v, color, score)
--- a/monoloco/visuals/printer.py
+++ b/monoloco/visuals/printer.py
@ -139,6 +139,7 @@ class Printer:

            fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]},
                                           figsize=(fig_width, fig_height))
+           
            ax1.set_aspect(fig_ar_1)
            fig.set_tight_layout(True)
            fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02)
@ -194,7 +195,7 @@ class Printer:
    def social_distance_bird(self, axis, colors):
        draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird')

-    def draw(self, figures, axes, image, dic_out, annotations):
+    def draw(self, figures, axes, image, dic_out=None, annotations=None):

        if self.args.activities:
            colors = ['deepskyblue' for _ in self.uv_heads]
--- a/monoloco/visuals/webcam.py
+++ b/monoloco/visuals/webcam.py
@ -8,31 +8,31 @@ Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/

 import time
 import os
+import logging

 import torch
 import matplotlib.pyplot as plt
 from PIL import Image
 import cv2

-from openpifpaf import decoder, network, visualizer, show
+from openpifpaf import decoder, network, visualizer, show, logger
 import openpifpaf.datasets as datasets
 from openpifpaf.predict import processor_factory, preprocess_factory

 from ..visuals import Printer
 from ..network import Loco
 from ..network.process import preprocess_pifpaf, factory_for_gt
+from ..predict import download_checkpoints

-OPENPIFPAF_PATH = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl'
-
+LOG = logging.getLogger(__name__)

 def factory_from_args(args):

    # Model
-    if not args.checkpoint:
-        if os.path.exists(OPENPIFPAF_PATH):
-            args.checkpoint = OPENPIFPAF_PATH
-        else:
-            args.checkpoint = 'shufflenetv2k30'
+    dic_models = download_checkpoints(args)
+    args.checkpoint = dic_models['keypoints']
+
+    logger.configure(args, LOG)  # logger first

    # Devices
    args.device = torch.device('cpu')
@ -40,18 +40,20 @@ def factory_from_args(args):
    if torch.cuda.is_available():
        args.device = torch.device('cuda')
        args.pin_memory = True
+    LOG.debug('neural network device: %s', args.device)

    # Add visualization defaults
    args.figure_width = 10
    args.dpi_factor = 1.0

-    if args.net == 'monstereo':
-        args.batch_size = 2
-    else:
-        args.batch_size = 1
+    args.z_max = 10
+    args.show_all = True
+    args.no_save = True
+    args.batch_size = 1

    # Make default pifpaf argument
    args.force_complete_pose = True
+    LOG.info("Force complete pose is active")

    # Configure
    decoder.configure(args)
@ -59,22 +61,24 @@ def factory_from_args(args):
    show.configure(args)
    visualizer.configure(args)

-    return args
+    return args, dic_models


 def webcam(args):
+    
+    assert args.mode in ('mono')
+    args, dic_models = factory_from_args(args)

-    args = factory_from_args(args)
    # Load Models
-    net = Loco(model=args.model, net=args.net, device=args.device,
+    net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device,
               n_dropout=args.n_dropout, p_dropout=args.dropout)

-    processor, model = processor_factory(args)
+    processor, pifpaf_model = processor_factory(args)
    preprocess = preprocess_factory(args)

    # Start recording
    cam = cv2.VideoCapture(0)
-    visualizer_monstereo = None
+    visualizer_mono = None

    while True:
        start = time.time()
@ -86,7 +90,7 @@ def webcam(args):
        pil_image = Image.fromarray(image)

        data = datasets.PilImageList(
-            make_list(pil_image), preprocess=preprocess)
+            [pil_image], preprocess=preprocess)

        data_loader = torch.utils.data.DataLoader(
            data, batch_size=1, shuffle=False,
@ -94,7 +98,7 @@ def webcam(args):

        for (image_tensors_batch, _, meta_batch) in data_loader:
            pred_batch = processor.batch(
-                model, image_tensors_batch, device=args.device)
+                pifpaf_model, image_tensors_batch, device=args.device)

            for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
                pred = [ann.inverse_transform(meta) for ann in pred]
@ -104,8 +108,6 @@ def webcam(args):
                        'pred': pred,
                        'left': [ann.json_data() for ann in pred],
                        'image': image}
-                else:
-                    pifpaf_outs['right'] = [ann.json_data() for ann in pred]

        if not ret:
            break
@ -114,10 +116,9 @@ def webcam(args):
            # ESC pressed
            print("Escape hit, closing...")
            break
+
        intrinsic_size = [xx * 1.3 for xx in pil_image.size]
-        kk, dic_gt = factory_for_gt(intrinsic_size,
-                                    focal_length=args.focal,
-                                    path_gt=args.path_gt)  # better intrinsics for mac camera
+        kk, dic_gt = factory_for_gt(intrinsic_size, focal_length=args.focal)  # better intrinsics for mac camera
        boxes, keypoints = preprocess_pifpaf(
            pifpaf_outs['left'], (width, height))

@ -129,13 +130,12 @@ def webcam(args):
                dic_out = net.social_distance(dic_out, args)
            if 'raise_hand' in args.activities:
                dic_out = net.raising_hand(dic_out, keypoints)
-        if visualizer_monstereo is None:  # it is, at the beginning
-            visualizer_monstereo = VisualizerMonstereo(kk,
-                                                       args)(pil_image)  # create it with the first image
-            visualizer_monstereo.send(None)
+        if visualizer_mono is None:  # it is, at the beginning
+            visualizer_mono = Visualizer(kk, args)(pil_image)  # create it with the first image
+            visualizer_mono.send(None)

        print(dic_out)
-        visualizer_monstereo.send((pil_image, dic_out, pifpaf_outs))
+        visualizer_mono.send((pil_image, dic_out, pifpaf_outs))

        end = time.time()
        print("run-time: {:.2f} ms".format((end-start)*1000))
@ -145,7 +145,7 @@ def webcam(args):
    cv2.destroyAllWindows()


-class VisualizerMonstereo:
+class Visualizer:
    def __init__(self, kk, args):
        self.kk = kk
        self.args = args
@ -189,8 +189,4 @@ def mypause(interval):
            canvas.draw_idle()
        canvas.start_event_loop(interval)
    else:
-        time.sleep(interval)
-
-
-def make_list(*args):
-    return list(args)
+        time.sleep(interval)