Working webcam and risen hand detection
This commit is contained in:
parent
ea63dd5781
commit
256102021a
@ -72,18 +72,17 @@ def is_raising_hand(keypoint):
|
|||||||
l_hand = 9
|
l_hand = 9
|
||||||
r_shoulder = 6
|
r_shoulder = 6
|
||||||
r_hand = 10
|
r_hand = 10
|
||||||
h_offset = 10
|
l_ear = 3
|
||||||
|
r_ear = 4
|
||||||
|
h_offset = 20
|
||||||
|
|
||||||
if ((keypoint[1][l_hand] < keypoint[1][l_shoulder] and
|
if keypoint[1][l_hand] < keypoint[1][l_shoulder] and keypoint[1][r_hand] < keypoint[1][r_shoulder]:
|
||||||
keypoint[1][r_hand] < keypoint[1][r_shoulder]) and
|
|
||||||
(keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder] and
|
|
||||||
keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder])):
|
|
||||||
return 'both'
|
return 'both'
|
||||||
|
|
||||||
if (keypoint[1][l_hand] < keypoint[1][l_shoulder]) and (keypoint[0][l_hand] - h_offset > keypoint[0][l_shoulder]):
|
if keypoint[1][l_hand] < keypoint[1][l_shoulder]:
|
||||||
return 'left'
|
return 'left'
|
||||||
|
|
||||||
if keypoint[1][r_hand] < keypoint[1][r_shoulder] and keypoint[0][r_hand] + h_offset < keypoint[0][r_shoulder]:
|
if keypoint[1][r_hand] < keypoint[1][r_shoulder]:
|
||||||
return 'right'
|
return 'right'
|
||||||
|
|
||||||
return 'none'
|
return 'none'
|
||||||
@ -157,6 +156,7 @@ def show_activities(args, image_t, output_path, annotations, dic_out):
|
|||||||
r_h = 'none'
|
r_h = 'none'
|
||||||
if 'raise_hand' in args.activities:
|
if 'raise_hand' in args.activities:
|
||||||
r_h = dic_out['raising_hand']
|
r_h = dic_out['raising_hand']
|
||||||
|
print("RAISE_HAND :", r_h)
|
||||||
|
|
||||||
with image_canvas(image_t,
|
with image_canvas(image_t,
|
||||||
output_path + '.front.png',
|
output_path + '.front.png',
|
||||||
|
|||||||
@ -21,7 +21,7 @@ from openpifpaf import decoder, network, visualizer, show, logger
|
|||||||
from .visuals.printer import Printer
|
from .visuals.printer import Printer
|
||||||
from .network import Loco
|
from .network import Loco
|
||||||
from .network.process import factory_for_gt, preprocess_pifpaf
|
from .network.process import factory_for_gt, preprocess_pifpaf
|
||||||
from .activity import show_activities, show_social
|
from .activity import show_activities
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -63,7 +63,7 @@ def download_checkpoints(args):
|
|||||||
assert not args.social_distance, "Social distance not supported in stereo modality"
|
assert not args.social_distance, "Social distance not supported in stereo modality"
|
||||||
path = MONSTEREO_MODEL
|
path = MONSTEREO_MODEL
|
||||||
name = 'monstereo-201202-1212.pkl'
|
name = 'monstereo-201202-1212.pkl'
|
||||||
elif args.social_distance:
|
elif args.social_distance or (args.activities and 'social_distance' in args.activities) or args.webcam:
|
||||||
path = MONOLOCO_MODEL_NU
|
path = MONOLOCO_MODEL_NU
|
||||||
name = 'monoloco_pp-201207-1350.pkl'
|
name = 'monoloco_pp-201207-1350.pkl'
|
||||||
else:
|
else:
|
||||||
@ -204,8 +204,10 @@ def predict(args):
|
|||||||
LOG.info("Prediction with MonoLoco++")
|
LOG.info("Prediction with MonoLoco++")
|
||||||
dic_out = net.forward(keypoints, kk)
|
dic_out = net.forward(keypoints, kk)
|
||||||
dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt)
|
dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt)
|
||||||
if args.social_distance:
|
if args.social_distance or (args.activities and 'social_distance' in args.activities):
|
||||||
dic_out = net.social_distance(dic_out, args)
|
dic_out = net.social_distance(dic_out, args)
|
||||||
|
if args.activities and 'raise_hand' in args.activities:
|
||||||
|
dic_out = net.raising_hand(dic_out, keypoints)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
LOG.info("Prediction with MonStereo")
|
LOG.info("Prediction with MonStereo")
|
||||||
|
|||||||
@ -47,18 +47,18 @@ def cli():
|
|||||||
|
|
||||||
# Monoloco
|
# Monoloco
|
||||||
predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand')
|
predict_parser.add_argument('--activities', nargs='+', help='Choose activities to show: social_distance, raise_hand')
|
||||||
predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo', default='monoloco_pp')
|
predict_parser.add_argument('--mode', help='keypoints, mono, stereo', default='mono')
|
||||||
predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True)
|
predict_parser.add_argument('--model', help='path of MonoLoco/MonStereo model to load')
|
||||||
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512)
|
predict_parser.add_argument('--net', help='only to select older MonoLoco model, otherwise use --mode')
|
||||||
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
|
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization')
|
||||||
default='data/arrays/names-kitti-200615-1022.json')
|
#default='data/arrays/names-kitti-200615-1022.json')
|
||||||
predict_parser.add_argument('--transform', help='transformation for the pose', default='None')
|
|
||||||
predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100)
|
predict_parser.add_argument('--z_max', type=int, help='maximum meters distance for predictions', default=100)
|
||||||
predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
|
predict_parser.add_argument('--n_dropout', type=int, help='Epistemic uncertainty evaluation', default=0)
|
||||||
predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
|
predict_parser.add_argument('--dropout', type=float, help='dropout parameter', default=0.2)
|
||||||
predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
|
predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
|
||||||
predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true')
|
predict_parser.add_argument('--webcam', help='monstereo streaming', action='store_true')
|
||||||
predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image')
|
predict_parser.add_argument('--scale', default=0.2, type=float, help='change the scale of the webcam image')
|
||||||
|
|
||||||
# Social distancing and social interactions
|
# Social distancing and social interactions
|
||||||
predict_parser.add_argument('--social_distance', help='social', action='store_true')
|
predict_parser.add_argument('--social_distance', help='social', action='store_true')
|
||||||
predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
|
predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
|
||||||
@ -128,10 +128,6 @@ def main():
|
|||||||
args = cli()
|
args = cli()
|
||||||
if args.command == 'predict':
|
if args.command == 'predict':
|
||||||
if args.webcam:
|
if args.webcam:
|
||||||
if 'json'in args.output_types:
|
|
||||||
args.output_types = 'multi'
|
|
||||||
if args.z_max == 100:
|
|
||||||
args.z_max = 10
|
|
||||||
from .visuals.webcam import webcam
|
from .visuals.webcam import webcam
|
||||||
webcam(args)
|
webcam(args)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -200,11 +200,15 @@ class KeypointPainter(object):
|
|||||||
if isinstance(color, (int, np.integer)):
|
if isinstance(color, (int, np.integer)):
|
||||||
color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20)
|
color = matplotlib.cm.get_cmap('tab20')((color % 20 + 0.05) / 20)
|
||||||
|
|
||||||
self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
|
if raise_hand is not 'none':
|
||||||
|
self._draw_skeleton(ax, x, y, v, color=color, raise_hand=raise_hand[:][i])
|
||||||
|
else:
|
||||||
|
self._draw_skeleton(ax, x, y, v, color=color)
|
||||||
score = scores[i] if scores is not None else None
|
score = scores[i] if scores is not None else None
|
||||||
z_str = str(score).split(sep='.')
|
if score is not None:
|
||||||
text = z_str[0] + '.' + z_str[1][0]
|
z_str = str(score).split(sep='.')
|
||||||
self._draw_text(ax, x-2, y, v, text, color)
|
text = z_str[0] + '.' + z_str[1][0]
|
||||||
|
self._draw_text(ax, x-2, y, v, text, color)
|
||||||
if self.show_box:
|
if self.show_box:
|
||||||
score = scores[i] if scores is not None else None
|
score = scores[i] if scores is not None else None
|
||||||
self._draw_box(ax, x, y, v, color, score)
|
self._draw_box(ax, x, y, v, color, score)
|
||||||
|
|||||||
@ -139,6 +139,7 @@ class Printer:
|
|||||||
|
|
||||||
fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]},
|
fig, (ax0, ax1) = plt.subplots(1, 2, sharey=False, gridspec_kw={'width_ratios': [width_ratio, 1]},
|
||||||
figsize=(fig_width, fig_height))
|
figsize=(fig_width, fig_height))
|
||||||
|
|
||||||
ax1.set_aspect(fig_ar_1)
|
ax1.set_aspect(fig_ar_1)
|
||||||
fig.set_tight_layout(True)
|
fig.set_tight_layout(True)
|
||||||
fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02)
|
fig.subplots_adjust(left=0.02, right=0.98, bottom=0, top=1, hspace=0, wspace=0.02)
|
||||||
@ -194,7 +195,7 @@ class Printer:
|
|||||||
def social_distance_bird(self, axis, colors):
|
def social_distance_bird(self, axis, colors):
|
||||||
draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird')
|
draw_orientation(axis, self.xz_centers, [], self.angles, colors, mode='bird')
|
||||||
|
|
||||||
def draw(self, figures, axes, image, dic_out, annotations):
|
def draw(self, figures, axes, image, dic_out=None, annotations=None):
|
||||||
|
|
||||||
if self.args.activities:
|
if self.args.activities:
|
||||||
colors = ['deepskyblue' for _ in self.uv_heads]
|
colors = ['deepskyblue' for _ in self.uv_heads]
|
||||||
|
|||||||
@ -8,31 +8,31 @@ Implementation adapted from https://github.com/vita-epfl/openpifpaf/blob/master/
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import cv2
|
import cv2
|
||||||
|
|
||||||
from openpifpaf import decoder, network, visualizer, show
|
from openpifpaf import decoder, network, visualizer, show, logger
|
||||||
import openpifpaf.datasets as datasets
|
import openpifpaf.datasets as datasets
|
||||||
from openpifpaf.predict import processor_factory, preprocess_factory
|
from openpifpaf.predict import processor_factory, preprocess_factory
|
||||||
|
|
||||||
from ..visuals import Printer
|
from ..visuals import Printer
|
||||||
from ..network import Loco
|
from ..network import Loco
|
||||||
from ..network.process import preprocess_pifpaf, factory_for_gt
|
from ..network.process import preprocess_pifpaf, factory_for_gt
|
||||||
|
from ..predict import download_checkpoints
|
||||||
|
|
||||||
OPENPIFPAF_PATH = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl'
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def factory_from_args(args):
|
def factory_from_args(args):
|
||||||
|
|
||||||
# Model
|
# Model
|
||||||
if not args.checkpoint:
|
dic_models = download_checkpoints(args)
|
||||||
if os.path.exists(OPENPIFPAF_PATH):
|
args.checkpoint = dic_models['keypoints']
|
||||||
args.checkpoint = OPENPIFPAF_PATH
|
|
||||||
else:
|
logger.configure(args, LOG) # logger first
|
||||||
args.checkpoint = 'shufflenetv2k30'
|
|
||||||
|
|
||||||
# Devices
|
# Devices
|
||||||
args.device = torch.device('cpu')
|
args.device = torch.device('cpu')
|
||||||
@ -40,18 +40,20 @@ def factory_from_args(args):
|
|||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
args.device = torch.device('cuda')
|
args.device = torch.device('cuda')
|
||||||
args.pin_memory = True
|
args.pin_memory = True
|
||||||
|
LOG.debug('neural network device: %s', args.device)
|
||||||
|
|
||||||
# Add visualization defaults
|
# Add visualization defaults
|
||||||
args.figure_width = 10
|
args.figure_width = 10
|
||||||
args.dpi_factor = 1.0
|
args.dpi_factor = 1.0
|
||||||
|
|
||||||
if args.net == 'monstereo':
|
args.z_max = 10
|
||||||
args.batch_size = 2
|
args.show_all = True
|
||||||
else:
|
args.no_save = True
|
||||||
args.batch_size = 1
|
args.batch_size = 1
|
||||||
|
|
||||||
# Make default pifpaf argument
|
# Make default pifpaf argument
|
||||||
args.force_complete_pose = True
|
args.force_complete_pose = True
|
||||||
|
LOG.info("Force complete pose is active")
|
||||||
|
|
||||||
# Configure
|
# Configure
|
||||||
decoder.configure(args)
|
decoder.configure(args)
|
||||||
@ -59,22 +61,24 @@ def factory_from_args(args):
|
|||||||
show.configure(args)
|
show.configure(args)
|
||||||
visualizer.configure(args)
|
visualizer.configure(args)
|
||||||
|
|
||||||
return args
|
return args, dic_models
|
||||||
|
|
||||||
|
|
||||||
def webcam(args):
|
def webcam(args):
|
||||||
|
|
||||||
|
assert args.mode in ('mono')
|
||||||
|
args, dic_models = factory_from_args(args)
|
||||||
|
|
||||||
args = factory_from_args(args)
|
|
||||||
# Load Models
|
# Load Models
|
||||||
net = Loco(model=args.model, net=args.net, device=args.device,
|
net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device,
|
||||||
n_dropout=args.n_dropout, p_dropout=args.dropout)
|
n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||||
|
|
||||||
processor, model = processor_factory(args)
|
processor, pifpaf_model = processor_factory(args)
|
||||||
preprocess = preprocess_factory(args)
|
preprocess = preprocess_factory(args)
|
||||||
|
|
||||||
# Start recording
|
# Start recording
|
||||||
cam = cv2.VideoCapture(0)
|
cam = cv2.VideoCapture(0)
|
||||||
visualizer_monstereo = None
|
visualizer_mono = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
start = time.time()
|
start = time.time()
|
||||||
@ -86,7 +90,7 @@ def webcam(args):
|
|||||||
pil_image = Image.fromarray(image)
|
pil_image = Image.fromarray(image)
|
||||||
|
|
||||||
data = datasets.PilImageList(
|
data = datasets.PilImageList(
|
||||||
make_list(pil_image), preprocess=preprocess)
|
[pil_image], preprocess=preprocess)
|
||||||
|
|
||||||
data_loader = torch.utils.data.DataLoader(
|
data_loader = torch.utils.data.DataLoader(
|
||||||
data, batch_size=1, shuffle=False,
|
data, batch_size=1, shuffle=False,
|
||||||
@ -94,7 +98,7 @@ def webcam(args):
|
|||||||
|
|
||||||
for (image_tensors_batch, _, meta_batch) in data_loader:
|
for (image_tensors_batch, _, meta_batch) in data_loader:
|
||||||
pred_batch = processor.batch(
|
pred_batch = processor.batch(
|
||||||
model, image_tensors_batch, device=args.device)
|
pifpaf_model, image_tensors_batch, device=args.device)
|
||||||
|
|
||||||
for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
|
for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
|
||||||
pred = [ann.inverse_transform(meta) for ann in pred]
|
pred = [ann.inverse_transform(meta) for ann in pred]
|
||||||
@ -104,8 +108,6 @@ def webcam(args):
|
|||||||
'pred': pred,
|
'pred': pred,
|
||||||
'left': [ann.json_data() for ann in pred],
|
'left': [ann.json_data() for ann in pred],
|
||||||
'image': image}
|
'image': image}
|
||||||
else:
|
|
||||||
pifpaf_outs['right'] = [ann.json_data() for ann in pred]
|
|
||||||
|
|
||||||
if not ret:
|
if not ret:
|
||||||
break
|
break
|
||||||
@ -114,10 +116,9 @@ def webcam(args):
|
|||||||
# ESC pressed
|
# ESC pressed
|
||||||
print("Escape hit, closing...")
|
print("Escape hit, closing...")
|
||||||
break
|
break
|
||||||
|
|
||||||
intrinsic_size = [xx * 1.3 for xx in pil_image.size]
|
intrinsic_size = [xx * 1.3 for xx in pil_image.size]
|
||||||
kk, dic_gt = factory_for_gt(intrinsic_size,
|
kk, dic_gt = factory_for_gt(intrinsic_size, focal_length=args.focal) # better intrinsics for mac camera
|
||||||
focal_length=args.focal,
|
|
||||||
path_gt=args.path_gt) # better intrinsics for mac camera
|
|
||||||
boxes, keypoints = preprocess_pifpaf(
|
boxes, keypoints = preprocess_pifpaf(
|
||||||
pifpaf_outs['left'], (width, height))
|
pifpaf_outs['left'], (width, height))
|
||||||
|
|
||||||
@ -129,13 +130,12 @@ def webcam(args):
|
|||||||
dic_out = net.social_distance(dic_out, args)
|
dic_out = net.social_distance(dic_out, args)
|
||||||
if 'raise_hand' in args.activities:
|
if 'raise_hand' in args.activities:
|
||||||
dic_out = net.raising_hand(dic_out, keypoints)
|
dic_out = net.raising_hand(dic_out, keypoints)
|
||||||
if visualizer_monstereo is None: # it is, at the beginning
|
if visualizer_mono is None: # it is, at the beginning
|
||||||
visualizer_monstereo = VisualizerMonstereo(kk,
|
visualizer_mono = Visualizer(kk, args)(pil_image) # create it with the first image
|
||||||
args)(pil_image) # create it with the first image
|
visualizer_mono.send(None)
|
||||||
visualizer_monstereo.send(None)
|
|
||||||
|
|
||||||
print(dic_out)
|
print(dic_out)
|
||||||
visualizer_monstereo.send((pil_image, dic_out, pifpaf_outs))
|
visualizer_mono.send((pil_image, dic_out, pifpaf_outs))
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
print("run-time: {:.2f} ms".format((end-start)*1000))
|
print("run-time: {:.2f} ms".format((end-start)*1000))
|
||||||
@ -145,7 +145,7 @@ def webcam(args):
|
|||||||
cv2.destroyAllWindows()
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
|
||||||
class VisualizerMonstereo:
|
class Visualizer:
|
||||||
def __init__(self, kk, args):
|
def __init__(self, kk, args):
|
||||||
self.kk = kk
|
self.kk = kk
|
||||||
self.args = args
|
self.args = args
|
||||||
@ -189,8 +189,4 @@ def mypause(interval):
|
|||||||
canvas.draw_idle()
|
canvas.draw_idle()
|
||||||
canvas.start_event_loop(interval)
|
canvas.start_event_loop(interval)
|
||||||
else:
|
else:
|
||||||
time.sleep(interval)
|
time.sleep(interval)
|
||||||
|
|
||||||
|
|
||||||
def make_list(*args):
|
|
||||||
return list(args)
|
|
||||||
Loading…
Reference in New Issue
Block a user