Working webcam and risen hand detection

This commit is contained in:
charlesbvll 2021-03-28 15:10:38 +02:00
parent ea63dd5781
commit 256102021a
6 changed files with 59 additions and 60 deletions

View File

@ -72,18 +72,17 @@ def is_raising_hand(keypoint):
l_hand = 9
r_shoulder = 6
r_hand = 10
h_offset = 10
l_ear = 3
r_ear = 4
h_offset = 20
if ((keypoint[1][l_hand] < keypoint[1][l_shoulder] and
keypoint[1][r_hand] < keypoint[1][r_shoulder]) and
(keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder] and
keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder])):
if keypoint[1][l_hand] < keypoint[1][l_shoulder] and keypoint[1][r_hand] < keypoint[1][r_shoulder]:
return 'both'
if (keypoint[1][l_hand] < keypoint[1][l_shoulder]) and (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder]):
if keypoint[1][l_hand] < keypoint[1][l_shoulder]:
return 'left'
if keypoint[1][r_hand] < keypoint[1][r_shoulder] and keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder]:
if keypoint[1][r_hand] < keypoint[1][r_shoulder]:
return 'right'
return 'none'
@ -157,6 +156,7 @@ def show_activities(args, image_t, output_path, annotations, dic_out):
r_h = 'none'
if 'raise_hand' in args.activities:
r_h = dic_out['raising_hand']
print("RAISE_HAND :", r_h)
with image_canvas(image_t,
output_path + '.front.png',

View File

@ -21,7 +21,7 @@ from openpifpaf import decoder, network, visualizer, show, logger
from .visuals.printer import Printer
from .network import Loco
from .network.process import factory_for_gt, preprocess_pifpaf
from .activity import show_activities, show_social
from .activity import show_activities
LOG = logging.getLogger(__name__)
@ -63,7 +63,7 @@ def download_checkpoints(args):
assert not args.social_distance, "Social distance not supported in stereo modality"
path = MONSTEREO_MODEL
name = 'monstereo-201202-1212.pkl'
elif args.social_distance:
elif args.social_distance or (args.activities and 'social_distance' in args.activities) or args.webcam:
path = MONOLOCO_MODEL_NU
name = 'monoloco_pp-201207-1350.pkl'
else:
@ -204,8 +204,10 @@ def predict(args):
LOG.info("Prediction with MonoLoco++")
dic_out = net.forward(keypoints, kk)
dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt)
if args.social_distance:
if args.social_distance or (args.activities and 'social_distance' in args.activities):
dic_out = net.social_distance(dic_out, args)
if args.activities and 'raise_hand' in args.activities:
dic_out = net.raising_hand(dic_out, keypoints)
else:
LOG.info("Prediction with MonStereo")

View File

@ -47,18 +47,18 @@ def cli():
# Monoloco
predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand')
predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo', default='monoloco_pp')
predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True)
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512)
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
default='data/arrays/names-kitti-200615-1022.json')
predict_parser.add_argument('--transform', help='transformation for the pose', default='None')
predict_parser.add_argument('--mode', help='keypoints, mono, stereo', default='mono')
predict_parser.add_argument('--model', help='path of MonoLoco/MonStereo model to load')
predict_parser.add_argument('--net', help='only to select older MonoLoco model, otherwise use --mode')
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization')
#default='data/arrays/names-kitti-200615-1022.json')
predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100)
predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true')
predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image')
# Social distancing and social interactions
predict_parser.add_argument('--social_distance', help='social', action='store_true')
predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
@ -128,10 +128,6 @@ def main():
args = cli()
if args.command == 'predict':
if args.webcam:
if 'json'in args.output_types:
args.output_types = 'multi'
if args.z_max == 100:
args.z_max = 10
from .visuals.webcam import webcam
webcam(args)
else:

View File

@ -200,11 +200,15 @@ class KeypointPainter(object):
if isinstance(color, (int, np.integer)):
color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20)
self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
if raise_hand is not 'none':
self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
else:
self._draw_skeleton(ax, x, y, v, color=color)
score = scores[i] if scores is not None else None
z_str = str(score).split(sep='.')
text = z_str[0] + '.' + z_str[1][0]
self._draw_text(ax, x-2, y, v, text, color)
if score is not None:
z_str = str(score).split(sep='.')
text = z_str[0] + '.' + z_str[1][0]
self._draw_text(ax, x-2, y, v, text, color)
if self.show_box:
score = scores[i] if scores is not None else None
self._draw_box(ax, x, y, v, color, score)

View File

@ -139,6 +139,7 @@ class Printer:
fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]},
figsize=(fig_width, fig_height))
ax1.set_aspect(fig_ar_1)
fig.set_tight_layout(True)
fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02)
@ -194,7 +195,7 @@ class Printer:
def social_distance_bird(self, axis, colors):
draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird')
def draw(self, figures, axes, image, dic_out, annotations):
def draw(self, figures, axes, image, dic_out=None, annotations=None):
if self.args.activities:
colors = ['deepskyblue' for _ in self.uv_heads]

View File

@ -8,31 +8,31 @@ Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/
import time
import os
import logging
import torch
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from openpifpaf import decoder, network, visualizer, show
from openpifpaf import decoder, network, visualizer, show, logger
import openpifpaf.datasets as datasets
from openpifpaf.predict import processor_factory, preprocess_factory
from ..visuals import Printer
from ..network import Loco
from ..network.process import preprocess_pifpaf, factory_for_gt
from ..predict import download_checkpoints
OPENPIFPAF_PATH = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl'
LOG = logging.getLogger(__name__)
def factory_from_args(args):
# Model
if not args.checkpoint:
if os.path.exists(OPENPIFPAF_PATH):
args.checkpoint = OPENPIFPAF_PATH
else:
args.checkpoint = 'shufflenetv2k30'
dic_models = download_checkpoints(args)
args.checkpoint = dic_models['keypoints']
logger.configure(args, LOG) # logger first
# Devices
args.device = torch.device('cpu')
@ -40,18 +40,20 @@ def factory_from_args(args):
if torch.cuda.is_available():
args.device = torch.device('cuda')
args.pin_memory = True
LOG.debug('neural network device: %s', args.device)
# Add visualization defaults
args.figure_width = 10
args.dpi_factor = 1.0
if args.net == 'monstereo':
args.batch_size = 2
else:
args.batch_size = 1
args.z_max = 10
args.show_all = True
args.no_save = True
args.batch_size = 1
# Make default pifpaf argument
args.force_complete_pose = True
LOG.info("Force complete pose is active")
# Configure
decoder.configure(args)
@ -59,22 +61,24 @@ def factory_from_args(args):
show.configure(args)
visualizer.configure(args)
return args
return args, dic_models
def webcam(args):
assert args.mode in ('mono')
args, dic_models = factory_from_args(args)
args = factory_from_args(args)
# Load Models
net = Loco(model=args.model, net=args.net, device=args.device,
net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device,
n_dropout=args.n_dropout, p_dropout=args.dropout)
processor, model = processor_factory(args)
processor, pifpaf_model = processor_factory(args)
preprocess = preprocess_factory(args)
# Start recording
cam = cv2.VideoCapture(0)
visualizer_monstereo = None
visualizer_mono = None
while True:
start = time.time()
@ -86,7 +90,7 @@ def webcam(args):
pil_image = Image.fromarray(image)
data = datasets.PilImageList(
make_list(pil_image), preprocess=preprocess)
[pil_image], preprocess=preprocess)
data_loader = torch.utils.data.DataLoader(
data, batch_size=1, shuffle=False,
@ -94,7 +98,7 @@ def webcam(args):
for (image_tensors_batch, _, meta_batch) in data_loader:
pred_batch = processor.batch(
model, image_tensors_batch, device=args.device)
pifpaf_model, image_tensors_batch, device=args.device)
for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
pred = [ann.inverse_transform(meta) for ann in pred]
@ -104,8 +108,6 @@ def webcam(args):
'pred': pred,
'left': [ann.json_data() for ann in pred],
'image': image}
else:
pifpaf_outs['right'] = [ann.json_data() for ann in pred]
if not ret:
break
@ -114,10 +116,9 @@ def webcam(args):
# ESC pressed
print("Escape hit, closing...")
break
intrinsic_size = [xx * 1.3 for xx in pil_image.size]
kk, dic_gt = factory_for_gt(intrinsic_size,
focal_length=args.focal,
path_gt=args.path_gt) # better intrinsics for mac camera
kk, dic_gt = factory_for_gt(intrinsic_size, focal_length=args.focal) # better intrinsics for mac camera
boxes, keypoints = preprocess_pifpaf(
pifpaf_outs['left'], (width, height))
@ -129,13 +130,12 @@ def webcam(args):
dic_out = net.social_distance(dic_out, args)
if 'raise_hand' in args.activities:
dic_out = net.raising_hand(dic_out, keypoints)
if visualizer_monstereo is None: # it is, at the beginning
visualizer_monstereo = VisualizerMonstereo(kk,
args)(pil_image) # create it with the first image
visualizer_monstereo.send(None)
if visualizer_mono is None: # it is, at the beginning
visualizer_mono = Visualizer(kk, args)(pil_image) # create it with the first image
visualizer_mono.send(None)
print(dic_out)
visualizer_monstereo.send((pil_image, dic_out, pifpaf_outs))
visualizer_mono.send((pil_image, dic_out, pifpaf_outs))
end = time.time()
print("run-time: {:.2f} ms".format((end-start)*1000))
@ -145,7 +145,7 @@ def webcam(args):
cv2.destroyAllWindows()
class VisualizerMonstereo:
class Visualizer:
def __init__(self, kk, args):
self.kk = kk
self.args = args
@ -189,8 +189,4 @@ def mypause(interval):
canvas.draw_idle()
canvas.start_event_loop(interval)
else:
time.sleep(interval)
def make_list(*args):
return list(args)
time.sleep(interval)