Working webcam and risen hand detection
This commit is contained in:
parent
ea63dd5781
commit
256102021a
@ -72,18 +72,17 @@ def is_raising_hand(keypoint):
|
||||
l_hand = 9
|
||||
r_shoulder = 6
|
||||
r_hand = 10
|
||||
h_offset = 10
|
||||
l_ear = 3
|
||||
r_ear = 4
|
||||
h_offset = 20
|
||||
|
||||
if ((keypoint[1][l_hand] < keypoint[1][l_shoulder] and
|
||||
keypoint[1][r_hand] < keypoint[1][r_shoulder]) and
|
||||
(keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder] and
|
||||
keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder])):
|
||||
if keypoint[1][l_hand] < keypoint[1][l_shoulder] and keypoint[1][r_hand] < keypoint[1][r_shoulder]:
|
||||
return 'both'
|
||||
|
||||
if (keypoint[1][l_hand] < keypoint[1][l_shoulder]) and (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder]):
|
||||
if keypoint[1][l_hand] < keypoint[1][l_shoulder]:
|
||||
return 'left'
|
||||
|
||||
if keypoint[1][r_hand] < keypoint[1][r_shoulder] and keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder]:
|
||||
if keypoint[1][r_hand] < keypoint[1][r_shoulder]:
|
||||
return 'right'
|
||||
|
||||
return 'none'
|
||||
@ -157,6 +156,7 @@ def show_activities(args, image_t, output_path, annotations, dic_out):
|
||||
r_h = 'none'
|
||||
if 'raise_hand' in args.activities:
|
||||
r_h = dic_out['raising_hand']
|
||||
print("RAISE_HAND :", r_h)
|
||||
|
||||
with image_canvas(image_t,
|
||||
output_path + '.front.png',
|
||||
|
||||
@ -21,7 +21,7 @@ from openpifpaf import decoder, network, visualizer, show, logger
|
||||
from .visuals.printer import Printer
|
||||
from .network import Loco
|
||||
from .network.process import factory_for_gt, preprocess_pifpaf
|
||||
from .activity import show_activities, show_social
|
||||
from .activity import show_activities
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@ -63,7 +63,7 @@ def download_checkpoints(args):
|
||||
assert not args.social_distance, "Social distance not supported in stereo modality"
|
||||
path = MONSTEREO_MODEL
|
||||
name = 'monstereo-201202-1212.pkl'
|
||||
elif args.social_distance:
|
||||
elif args.social_distance or (args.activities and 'social_distance' in args.activities) or args.webcam:
|
||||
path = MONOLOCO_MODEL_NU
|
||||
name = 'monoloco_pp-201207-1350.pkl'
|
||||
else:
|
||||
@ -204,8 +204,10 @@ def predict(args):
|
||||
LOG.info("Prediction with MonoLoco++")
|
||||
dic_out = net.forward(keypoints, kk)
|
||||
dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt)
|
||||
if args.social_distance:
|
||||
if args.social_distance or (args.activities and 'social_distance' in args.activities):
|
||||
dic_out = net.social_distance(dic_out, args)
|
||||
if args.activities and 'raise_hand' in args.activities:
|
||||
dic_out = net.raising_hand(dic_out, keypoints)
|
||||
|
||||
else:
|
||||
LOG.info("Prediction with MonStereo")
|
||||
|
||||
@ -47,18 +47,18 @@ def cli():
|
||||
|
||||
# Monoloco
|
||||
predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand')
|
||||
predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo', default='monoloco_pp')
|
||||
predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True)
|
||||
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512)
|
||||
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
|
||||
default='data/arrays/names-kitti-200615-1022.json')
|
||||
predict_parser.add_argument('--transform', help='transformation for the pose', default='None')
|
||||
predict_parser.add_argument('--mode', help='keypoints, mono, stereo', default='mono')
|
||||
predict_parser.add_argument('--model', help='path of MonoLoco/MonStereo model to load')
|
||||
predict_parser.add_argument('--net', help='only to select older MonoLoco model, otherwise use --mode')
|
||||
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization')
|
||||
#default='data/arrays/names-kitti-200615-1022.json')
|
||||
predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100)
|
||||
predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
|
||||
predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
|
||||
predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
|
||||
predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true')
|
||||
predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image')
|
||||
|
||||
# Social distancing and social interactions
|
||||
predict_parser.add_argument('--social_distance', help='social', action='store_true')
|
||||
predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
|
||||
@ -128,10 +128,6 @@ def main():
|
||||
args = cli()
|
||||
if args.command == 'predict':
|
||||
if args.webcam:
|
||||
if 'json'in args.output_types:
|
||||
args.output_types = 'multi'
|
||||
if args.z_max == 100:
|
||||
args.z_max = 10
|
||||
from .visuals.webcam import webcam
|
||||
webcam(args)
|
||||
else:
|
||||
|
||||
@ -200,11 +200,15 @@ class KeypointPainter(object):
|
||||
if isinstance(color, (int, np.integer)):
|
||||
color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20)
|
||||
|
||||
self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
|
||||
if raise_hand is not 'none':
|
||||
self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
|
||||
else:
|
||||
self._draw_skeleton(ax, x, y, v, color=color)
|
||||
score = scores[i] if scores is not None else None
|
||||
z_str = str(score).split(sep='.')
|
||||
text = z_str[0] + '.' + z_str[1][0]
|
||||
self._draw_text(ax, x-2, y, v, text, color)
|
||||
if score is not None:
|
||||
z_str = str(score).split(sep='.')
|
||||
text = z_str[0] + '.' + z_str[1][0]
|
||||
self._draw_text(ax, x-2, y, v, text, color)
|
||||
if self.show_box:
|
||||
score = scores[i] if scores is not None else None
|
||||
self._draw_box(ax, x, y, v, color, score)
|
||||
|
||||
@ -139,6 +139,7 @@ class Printer:
|
||||
|
||||
fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]},
|
||||
figsize=(fig_width, fig_height))
|
||||
|
||||
ax1.set_aspect(fig_ar_1)
|
||||
fig.set_tight_layout(True)
|
||||
fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02)
|
||||
@ -194,7 +195,7 @@ class Printer:
|
||||
def social_distance_bird(self, axis, colors):
|
||||
draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird')
|
||||
|
||||
def draw(self, figures, axes, image, dic_out, annotations):
|
||||
def draw(self, figures, axes, image, dic_out=None, annotations=None):
|
||||
|
||||
if self.args.activities:
|
||||
colors = ['deepskyblue' for _ in self.uv_heads]
|
||||
|
||||
@ -8,31 +8,31 @@ Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/
|
||||
|
||||
import time
|
||||
import os
|
||||
import logging
|
||||
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
from PIL import Image
|
||||
import cv2
|
||||
|
||||
from openpifpaf import decoder, network, visualizer, show
|
||||
from openpifpaf import decoder, network, visualizer, show, logger
|
||||
import openpifpaf.datasets as datasets
|
||||
from openpifpaf.predict import processor_factory, preprocess_factory
|
||||
|
||||
from ..visuals import Printer
|
||||
from ..network import Loco
|
||||
from ..network.process import preprocess_pifpaf, factory_for_gt
|
||||
from ..predict import download_checkpoints
|
||||
|
||||
OPENPIFPAF_PATH = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
def factory_from_args(args):
|
||||
|
||||
# Model
|
||||
if not args.checkpoint:
|
||||
if os.path.exists(OPENPIFPAF_PATH):
|
||||
args.checkpoint = OPENPIFPAF_PATH
|
||||
else:
|
||||
args.checkpoint = 'shufflenetv2k30'
|
||||
dic_models = download_checkpoints(args)
|
||||
args.checkpoint = dic_models['keypoints']
|
||||
|
||||
logger.configure(args, LOG) # logger first
|
||||
|
||||
# Devices
|
||||
args.device = torch.device('cpu')
|
||||
@ -40,18 +40,20 @@ def factory_from_args(args):
|
||||
if torch.cuda.is_available():
|
||||
args.device = torch.device('cuda')
|
||||
args.pin_memory = True
|
||||
LOG.debug('neural network device: %s', args.device)
|
||||
|
||||
# Add visualization defaults
|
||||
args.figure_width = 10
|
||||
args.dpi_factor = 1.0
|
||||
|
||||
if args.net == 'monstereo':
|
||||
args.batch_size = 2
|
||||
else:
|
||||
args.batch_size = 1
|
||||
args.z_max = 10
|
||||
args.show_all = True
|
||||
args.no_save = True
|
||||
args.batch_size = 1
|
||||
|
||||
# Make default pifpaf argument
|
||||
args.force_complete_pose = True
|
||||
LOG.info("Force complete pose is active")
|
||||
|
||||
# Configure
|
||||
decoder.configure(args)
|
||||
@ -59,22 +61,24 @@ def factory_from_args(args):
|
||||
show.configure(args)
|
||||
visualizer.configure(args)
|
||||
|
||||
return args
|
||||
return args, dic_models
|
||||
|
||||
|
||||
def webcam(args):
|
||||
|
||||
assert args.mode in ('mono')
|
||||
args, dic_models = factory_from_args(args)
|
||||
|
||||
args = factory_from_args(args)
|
||||
# Load Models
|
||||
net = Loco(model=args.model, net=args.net, device=args.device,
|
||||
net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device,
|
||||
n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||
|
||||
processor, model = processor_factory(args)
|
||||
processor, pifpaf_model = processor_factory(args)
|
||||
preprocess = preprocess_factory(args)
|
||||
|
||||
# Start recording
|
||||
cam = cv2.VideoCapture(0)
|
||||
visualizer_monstereo = None
|
||||
visualizer_mono = None
|
||||
|
||||
while True:
|
||||
start = time.time()
|
||||
@ -86,7 +90,7 @@ def webcam(args):
|
||||
pil_image = Image.fromarray(image)
|
||||
|
||||
data = datasets.PilImageList(
|
||||
make_list(pil_image), preprocess=preprocess)
|
||||
[pil_image], preprocess=preprocess)
|
||||
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
data, batch_size=1, shuffle=False,
|
||||
@ -94,7 +98,7 @@ def webcam(args):
|
||||
|
||||
for (image_tensors_batch, _, meta_batch) in data_loader:
|
||||
pred_batch = processor.batch(
|
||||
model, image_tensors_batch, device=args.device)
|
||||
pifpaf_model, image_tensors_batch, device=args.device)
|
||||
|
||||
for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
|
||||
pred = [ann.inverse_transform(meta) for ann in pred]
|
||||
@ -104,8 +108,6 @@ def webcam(args):
|
||||
'pred': pred,
|
||||
'left': [ann.json_data() for ann in pred],
|
||||
'image': image}
|
||||
else:
|
||||
pifpaf_outs['right'] = [ann.json_data() for ann in pred]
|
||||
|
||||
if not ret:
|
||||
break
|
||||
@ -114,10 +116,9 @@ def webcam(args):
|
||||
# ESC pressed
|
||||
print("Escape hit, closing...")
|
||||
break
|
||||
|
||||
intrinsic_size = [xx * 1.3 for xx in pil_image.size]
|
||||
kk, dic_gt = factory_for_gt(intrinsic_size,
|
||||
focal_length=args.focal,
|
||||
path_gt=args.path_gt) # better intrinsics for mac camera
|
||||
kk, dic_gt = factory_for_gt(intrinsic_size, focal_length=args.focal) # better intrinsics for mac camera
|
||||
boxes, keypoints = preprocess_pifpaf(
|
||||
pifpaf_outs['left'], (width, height))
|
||||
|
||||
@ -129,13 +130,12 @@ def webcam(args):
|
||||
dic_out = net.social_distance(dic_out, args)
|
||||
if 'raise_hand' in args.activities:
|
||||
dic_out = net.raising_hand(dic_out, keypoints)
|
||||
if visualizer_monstereo is None: # it is, at the beginning
|
||||
visualizer_monstereo = VisualizerMonstereo(kk,
|
||||
args)(pil_image) # create it with the first image
|
||||
visualizer_monstereo.send(None)
|
||||
if visualizer_mono is None: # it is, at the beginning
|
||||
visualizer_mono = Visualizer(kk, args)(pil_image) # create it with the first image
|
||||
visualizer_mono.send(None)
|
||||
|
||||
print(dic_out)
|
||||
visualizer_monstereo.send((pil_image, dic_out, pifpaf_outs))
|
||||
visualizer_mono.send((pil_image, dic_out, pifpaf_outs))
|
||||
|
||||
end = time.time()
|
||||
print("run-time: {:.2f} ms".format((end-start)*1000))
|
||||
@ -145,7 +145,7 @@ def webcam(args):
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
class VisualizerMonstereo:
|
||||
class Visualizer:
|
||||
def __init__(self, kk, args):
|
||||
self.kk = kk
|
||||
self.args = args
|
||||
@ -189,8 +189,4 @@ def mypause(interval):
|
||||
canvas.draw_idle()
|
||||
canvas.start_event_loop(interval)
|
||||
else:
|
||||
time.sleep(interval)
|
||||
|
||||
|
||||
def make_list(*args):
|
||||
return list(args)
|
||||
time.sleep(interval)
|
||||
Loading…
Reference in New Issue
Block a user