Working webcam and risen hand detection

This commit is contained in:
charlesbvll 2021-03-28 15:10:38 +02:00
parent ea63dd5781
commit 256102021a
6 changed files with 59 additions and 60 deletions

View File

@ -72,18 +72,17 @@ def is_raising_hand(keypoint):
l_hand = 9 l_hand = 9
r_shoulder = 6 r_shoulder = 6
r_hand = 10 r_hand = 10
h_offset = 10 l_ear = 3
r_ear = 4
h_offset = 20
if ((keypoint[1][l_hand] < keypoint[1][l_shoulder] and if keypoint[1][l_hand] < keypoint[1][l_shoulder] and keypoint[1][r_hand] < keypoint[1][r_shoulder]:
keypoint[1][r_hand] < keypoint[1][r_shoulder]) and
(keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder] and
keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder])):
return 'both' return 'both'
if (keypoint[1][l_hand] < keypoint[1][l_shoulder]) and (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder]): if keypoint[1][l_hand] < keypoint[1][l_shoulder]:
return 'left' return 'left'
if keypoint[1][r_hand] < keypoint[1][r_shoulder] and keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder]: if keypoint[1][r_hand] < keypoint[1][r_shoulder]:
return 'right' return 'right'
return 'none' return 'none'
@ -157,6 +156,7 @@ def show_activities(args, image_t, output_path, annotations, dic_out):
r_h = 'none' r_h = 'none'
if 'raise_hand' in args.activities: if 'raise_hand' in args.activities:
r_h = dic_out['raising_hand'] r_h = dic_out['raising_hand']
print("RAISE_HAND :", r_h)
with image_canvas(image_t, with image_canvas(image_t,
output_path + '.front.png', output_path + '.front.png',

View File

@ -21,7 +21,7 @@ from openpifpaf import decoder, network, visualizer, show, logger
from .visuals.printer import Printer from .visuals.printer import Printer
from .network import Loco from .network import Loco
from .network.process import factory_for_gt, preprocess_pifpaf from .network.process import factory_for_gt, preprocess_pifpaf
from .activity import show_activities, show_social from .activity import show_activities
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -63,7 +63,7 @@ def download_checkpoints(args):
assert not args.social_distance, "Social distance not supported in stereo modality" assert not args.social_distance, "Social distance not supported in stereo modality"
path = MONSTEREO_MODEL path = MONSTEREO_MODEL
name = 'monstereo-201202-1212.pkl' name = 'monstereo-201202-1212.pkl'
elif args.social_distance: elif args.social_distance or (args.activities and 'social_distance' in args.activities) or args.webcam:
path = MONOLOCO_MODEL_NU path = MONOLOCO_MODEL_NU
name = 'monoloco_pp-201207-1350.pkl' name = 'monoloco_pp-201207-1350.pkl'
else: else:
@ -204,8 +204,10 @@ def predict(args):
LOG.info("Prediction with MonoLoco++") LOG.info("Prediction with MonoLoco++")
dic_out = net.forward(keypoints, kk) dic_out = net.forward(keypoints, kk)
dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt) dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt)
if args.social_distance: if args.social_distance or (args.activities and 'social_distance' in args.activities):
dic_out = net.social_distance(dic_out, args) dic_out = net.social_distance(dic_out, args)
if args.activities and 'raise_hand' in args.activities:
dic_out = net.raising_hand(dic_out, keypoints)
else: else:
LOG.info("Prediction with MonStereo") LOG.info("Prediction with MonStereo")

View File

@ -47,18 +47,18 @@ def cli():
# Monoloco # Monoloco
predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand') predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand')
predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo', default='monoloco_pp') predict_parser.add_argument('--mode', help='keypoints, mono, stereo', default='mono')
predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True) predict_parser.add_argument('--model', help='path of MonoLoco/MonStereo model to load')
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512) predict_parser.add_argument('--net', help='only to select older MonoLoco model, otherwise use --mode')
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization', predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization')
default='data/arrays/names-kitti-200615-1022.json') #default='data/arrays/names-kitti-200615-1022.json')
predict_parser.add_argument('--transform', help='transformation for the pose', default='None')
predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100) predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100)
predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0) predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2) predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true') predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true') predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true')
predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image') predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image')
# Social distancing and social interactions # Social distancing and social interactions
predict_parser.add_argument('--social_distance', help='social', action='store_true') predict_parser.add_argument('--social_distance', help='social', action='store_true')
predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25) predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
@ -128,10 +128,6 @@ def main():
args = cli() args = cli()
if args.command == 'predict': if args.command == 'predict':
if args.webcam: if args.webcam:
if 'json'in args.output_types:
args.output_types = 'multi'
if args.z_max == 100:
args.z_max = 10
from .visuals.webcam import webcam from .visuals.webcam import webcam
webcam(args) webcam(args)
else: else:

View File

@ -200,11 +200,15 @@ class KeypointPainter(object):
if isinstance(color, (int, np.integer)): if isinstance(color, (int, np.integer)):
color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20) color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20)
self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i]) if raise_hand is not 'none':
self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
else:
self._draw_skeleton(ax, x, y, v, color=color)
score = scores[i] if scores is not None else None score = scores[i] if scores is not None else None
z_str = str(score).split(sep='.') if score is not None:
text = z_str[0] + '.' + z_str[1][0] z_str = str(score).split(sep='.')
self._draw_text(ax, x-2, y, v, text, color) text = z_str[0] + '.' + z_str[1][0]
self._draw_text(ax, x-2, y, v, text, color)
if self.show_box: if self.show_box:
score = scores[i] if scores is not None else None score = scores[i] if scores is not None else None
self._draw_box(ax, x, y, v, color, score) self._draw_box(ax, x, y, v, color, score)

View File

@ -139,6 +139,7 @@ class Printer:
fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]}, fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]},
figsize=(fig_width, fig_height)) figsize=(fig_width, fig_height))
ax1.set_aspect(fig_ar_1) ax1.set_aspect(fig_ar_1)
fig.set_tight_layout(True) fig.set_tight_layout(True)
fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02) fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02)
@ -194,7 +195,7 @@ class Printer:
def social_distance_bird(self, axis, colors): def social_distance_bird(self, axis, colors):
draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird') draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird')
def draw(self, figures, axes, image, dic_out, annotations): def draw(self, figures, axes, image, dic_out=None, annotations=None):
if self.args.activities: if self.args.activities:
colors = ['deepskyblue' for _ in self.uv_heads] colors = ['deepskyblue' for _ in self.uv_heads]

View File

@ -8,31 +8,31 @@ Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/
import time import time
import os import os
import logging
import torch import torch
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from PIL import Image from PIL import Image
import cv2 import cv2
from openpifpaf import decoder, network, visualizer, show from openpifpaf import decoder, network, visualizer, show, logger
import openpifpaf.datasets as datasets import openpifpaf.datasets as datasets
from openpifpaf.predict import processor_factory, preprocess_factory from openpifpaf.predict import processor_factory, preprocess_factory
from ..visuals import Printer from ..visuals import Printer
from ..network import Loco from ..network import Loco
from ..network.process import preprocess_pifpaf, factory_for_gt from ..network.process import preprocess_pifpaf, factory_for_gt
from ..predict import download_checkpoints
OPENPIFPAF_PATH = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl' LOG = logging.getLogger(__name__)
def factory_from_args(args): def factory_from_args(args):
# Model # Model
if not args.checkpoint: dic_models = download_checkpoints(args)
if os.path.exists(OPENPIFPAF_PATH): args.checkpoint = dic_models['keypoints']
args.checkpoint = OPENPIFPAF_PATH
else: logger.configure(args, LOG) # logger first
args.checkpoint = 'shufflenetv2k30'
# Devices # Devices
args.device = torch.device('cpu') args.device = torch.device('cpu')
@ -40,18 +40,20 @@ def factory_from_args(args):
if torch.cuda.is_available(): if torch.cuda.is_available():
args.device = torch.device('cuda') args.device = torch.device('cuda')
args.pin_memory = True args.pin_memory = True
LOG.debug('neural network device: %s', args.device)
# Add visualization defaults # Add visualization defaults
args.figure_width = 10 args.figure_width = 10
args.dpi_factor = 1.0 args.dpi_factor = 1.0
if args.net == 'monstereo': args.z_max = 10
args.batch_size = 2 args.show_all = True
else: args.no_save = True
args.batch_size = 1 args.batch_size = 1
# Make default pifpaf argument # Make default pifpaf argument
args.force_complete_pose = True args.force_complete_pose = True
LOG.info("Force complete pose is active")
# Configure # Configure
decoder.configure(args) decoder.configure(args)
@ -59,22 +61,24 @@ def factory_from_args(args):
show.configure(args) show.configure(args)
visualizer.configure(args) visualizer.configure(args)
return args return args, dic_models
def webcam(args): def webcam(args):
assert args.mode in ('mono')
args, dic_models = factory_from_args(args)
args = factory_from_args(args)
# Load Models # Load Models
net = Loco(model=args.model, net=args.net, device=args.device, net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device,
n_dropout=args.n_dropout, p_dropout=args.dropout) n_dropout=args.n_dropout, p_dropout=args.dropout)
processor, model = processor_factory(args) processor, pifpaf_model = processor_factory(args)
preprocess = preprocess_factory(args) preprocess = preprocess_factory(args)
# Start recording # Start recording
cam = cv2.VideoCapture(0) cam = cv2.VideoCapture(0)
visualizer_monstereo = None visualizer_mono = None
while True: while True:
start = time.time() start = time.time()
@ -86,7 +90,7 @@ def webcam(args):
pil_image = Image.fromarray(image) pil_image = Image.fromarray(image)
data = datasets.PilImageList( data = datasets.PilImageList(
make_list(pil_image), preprocess=preprocess) [pil_image], preprocess=preprocess)
data_loader = torch.utils.data.DataLoader( data_loader = torch.utils.data.DataLoader(
data, batch_size=1, shuffle=False, data, batch_size=1, shuffle=False,
@ -94,7 +98,7 @@ def webcam(args):
for (image_tensors_batch, _, meta_batch) in data_loader: for (image_tensors_batch, _, meta_batch) in data_loader:
pred_batch = processor.batch( pred_batch = processor.batch(
model, image_tensors_batch, device=args.device) pifpaf_model, image_tensors_batch, device=args.device)
for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)): for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
pred = [ann.inverse_transform(meta) for ann in pred] pred = [ann.inverse_transform(meta) for ann in pred]
@ -104,8 +108,6 @@ def webcam(args):
'pred': pred, 'pred': pred,
'left': [ann.json_data() for ann in pred], 'left': [ann.json_data() for ann in pred],
'image': image} 'image': image}
else:
pifpaf_outs['right'] = [ann.json_data() for ann in pred]
if not ret: if not ret:
break break
@ -114,10 +116,9 @@ def webcam(args):
# ESC pressed # ESC pressed
print("Escape hit, closing...") print("Escape hit, closing...")
break break
intrinsic_size = [xx * 1.3 for xx in pil_image.size] intrinsic_size = [xx * 1.3 for xx in pil_image.size]
kk, dic_gt = factory_for_gt(intrinsic_size, kk, dic_gt = factory_for_gt(intrinsic_size, focal_length=args.focal) # better intrinsics for mac camera
focal_length=args.focal,
path_gt=args.path_gt) # better intrinsics for mac camera
boxes, keypoints = preprocess_pifpaf( boxes, keypoints = preprocess_pifpaf(
pifpaf_outs['left'], (width, height)) pifpaf_outs['left'], (width, height))
@ -129,13 +130,12 @@ def webcam(args):
dic_out = net.social_distance(dic_out, args) dic_out = net.social_distance(dic_out, args)
if 'raise_hand' in args.activities: if 'raise_hand' in args.activities:
dic_out = net.raising_hand(dic_out, keypoints) dic_out = net.raising_hand(dic_out, keypoints)
if visualizer_monstereo is None: # it is, at the beginning if visualizer_mono is None: # it is, at the beginning
visualizer_monstereo = VisualizerMonstereo(kk, visualizer_mono = Visualizer(kk, args)(pil_image) # create it with the first image
args)(pil_image) # create it with the first image visualizer_mono.send(None)
visualizer_monstereo.send(None)
print(dic_out) print(dic_out)
visualizer_monstereo.send((pil_image, dic_out, pifpaf_outs)) visualizer_mono.send((pil_image, dic_out, pifpaf_outs))
end = time.time() end = time.time()
print("run-time: {:.2f} ms".format((end-start)*1000)) print("run-time: {:.2f} ms".format((end-start)*1000))
@ -145,7 +145,7 @@ def webcam(args):
cv2.destroyAllWindows() cv2.destroyAllWindows()
class VisualizerMonstereo: class Visualizer:
def __init__(self, kk, args): def __init__(self, kk, args):
self.kk = kk self.kk = kk
self.args = args self.args = args
@ -189,8 +189,4 @@ def mypause(interval):
canvas.draw_idle() canvas.draw_idle()
canvas.start_event_loop(interval) canvas.start_event_loop(interval)
else: else:
time.sleep(interval) time.sleep(interval)
def make_list(*args):
return list(args)