diff --git a/.pylintrc b/.pylintrc index bce16c1..5167ba1 100644 --- a/.pylintrc +++ b/.pylintrc @@ -9,7 +9,9 @@ Good-names=xx,dd,zz,hh,ww,pp,kk,lr,w1,w2,w3,mm,im,uv,ax,COV_MIN,CONF_MIN [TYPECHECK] -disable=import-error,invalid-name,unused-variable,fixme,E1102,missing-docstring,useless-object-inheritance,duplicate-code,too-many-arguments,too-many-instance-attributes,too-many-locals,too-few-public-methods,arguments-differ,logging-format-interpolation +disable=import-error,invalid-name,unused-variable, +E1102,missing-docstring,useless-object-inheritance,duplicate-code,too-many-arguments, +too-many-instance-attributes,too-many-locals,too-few-public-methods,arguments-differ,logging-format-interpolation # List of members which are set dynamically and missed by pylint inference diff --git a/docs/MonoLoco++.md b/docs/MonoLoco++.md index 8281af0..9c867fb 100644 --- a/docs/MonoLoco++.md +++ b/docs/MonoLoco++.md @@ -1,8 +1,6 @@ # Perceiving Humans: from Monocular 3D Localization to Social Distancing -![social distancing](docs/social_distancing.jpg) - > Perceiving humans in the context of Intelligent Transportation Systems (ITS) often relies on multiple cameras or expensive LiDAR sensors. In this work, we present a new cost- effective vision-based method that perceives humans’ locations in 3D @@ -18,8 +16,21 @@ Our vision-based system (i) is privacy-safe, (ii) works with any fixed or moving Indeed, we show that we can rethink the concept of “social distancing” as a form of social interaction in contrast to a simple location-based rule. We publicly share the source code towards an open science mission. +``` +@InProceedings{bertoni_social, +author = {Bertoni, Lorenzo and Kreiss, Sven and Alahi, Alexandre}, +title={Perceiving Humans: from Monocular 3D Localization to Social Distancing}, +booktitle = {arXiv:2009.00984}, +month = {September}, +year = {2020} +} +``` +![social distancing](social_distancing.jpg) + ## Predictions -For a quick setup download a pifpaf and a MonoLoco++ models from TODO and save them into `data/models`. +For a quick setup download a pifpaf and a MonoLoco++ models from +[here](https://drive.google.com/drive/folders/1jZToVMBEZQMdLB5BAIq2CdCLP5kzNo9t?usp=sharing) +and save them into `data/models`. ### 3D Localization The predict script receives an image (or an entire folder using glob expressions), @@ -161,12 +172,12 @@ For a more extensive list of available parameters, run: We provide evaluation on KITTI for models trained on nuScenes or KITTI. We compare them with other monocular and stereo Baselines: -[MonoLoco](TODO), +[MonoLoco](https://github.com/vita-epfl/monoloco), [Mono3D](https://www.cs.toronto.edu/~urtasun/publications/chen_etal_cvpr16.pdf), [3DOP](https://xiaozhichen.github.io/papers/nips15chen.pdf), [MonoDepth](https://arxiv.org/abs/1609.03677) -[MonoPSR](TODO) and our -[MonoDIS](TODO) and our +[MonoPSR](https://github.com/kujason/monopsr) and our +[MonoDIS](https://research.mapillary.com/img/publications/MonoDIS.pdf) and our [Geometrical Baseline](monoloco/eval/geom_baseline.py). * **Mono3D**: download validation files from [here](http://3dimage.ee.tsinghua.edu.cn/cxz/mono3d) diff --git a/monstereo/activity.py b/monstereo/activity.py index 7fe9af2..468737d 100644 --- a/monstereo/activity.py +++ b/monstereo/activity.py @@ -2,24 +2,16 @@ # pylint: disable=too-many-statements import math -import glob -import os import copy from contextlib import contextmanager import numpy as np import torch -import torch.nn.functional as F -import torchvision import matplotlib.pyplot as plt from matplotlib.patches import Circle, FancyArrow -from PIL import Image from .network.process import laplace_sampling -from .utils import open_annotations, get_task_error from .visuals.pifpaf_show import KeypointPainter, image_canvas -from .network import Loco -from .network.process import factory_for_gt, preprocess_pifpaf def social_interactions(idx, centers, angles, dds, stds=None, social_distance=False, @@ -113,101 +105,6 @@ def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False return False -def predict(args): - - cnt = 0 - args.device = torch.device('cpu') - if torch.cuda.is_available(): - args.device = torch.device('cuda') - - # Load data and model - monoloco = Loco(model=args.model, net='monoloco_pp', - device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - - images = [] - images += glob.glob(args.glob) # from cli as a string or linux converts - - # Option 1: Run PifPaf extract poses and run MonoLoco in a single forward pass - if args.json_dir is None: - from .network import PifPaf, ImageList - pifpaf = PifPaf(args) - data = ImageList(args.images, scale=args.scale) - data_loader = torch.utils.data.DataLoader( - data, batch_size=1, shuffle=False, - pin_memory=args.pin_memory, num_workers=args.loader_workers) - - for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader): - images = image_tensors.permute(0, 2, 3, 1) - - processed_images = processed_images_cpu.to(args.device, non_blocking=True) - fields_batch = pifpaf.fields(processed_images) - - # unbatch - for image_path, image, processed_image_cpu, fields in zip( - image_paths, images, processed_images_cpu, fields_batch): - - if args.output_directory is None: - output_path = image_path - else: - file_name = os.path.basename(image_path) - output_path = os.path.join(args.output_directory, file_name) - im_size = (float(image.size()[1] / args.scale), - float(image.size()[0] / args.scale)) - - print('image', idx, image_path, output_path) - - _, _, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields) - - kk, dic_gt = factory_for_gt(im_size, name=image_path, path_gt=args.path_gt) - image_t = image # Resized tensor - - # Run Monoloco - boxes, keypoints = preprocess_pifpaf(pifpaf_out, im_size, enlarge_boxes=False) - dic_out = monoloco.forward(keypoints, kk) - dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=False) - - # Print - show_social(args, image_t, output_path, pifpaf_out, dic_out) - - print('Image {}\n'.format(cnt) + '-' * 120) - cnt += 1 - - # Option 2: Load json file of poses from PifPaf and run monoloco - else: - for idx, im_path in enumerate(images): - - # Load image - with open(im_path, 'rb') as f: - image = Image.open(f).convert('RGB') - if args.output_directory is None: - output_path = im_path - else: - file_name = os.path.basename(im_path) - output_path = os.path.join(args.output_directory, file_name) - - im_size = (float(image.size[0] / args.scale), - float(image.size[1] / args.scale)) # Width, Height (original) - kk, dic_gt = factory_for_gt(im_size, name=im_path, path_gt=args.path_gt) - image_t = torchvision.transforms.functional.to_tensor(image).permute(1, 2, 0) - - # Load json - basename, ext = os.path.splitext(os.path.basename(im_path)) - - extension = ext + '.pifpaf.json' - path_json = os.path.join(args.json_dir, basename + extension) - annotations = open_annotations(path_json) - - # Run Monoloco - boxes, keypoints = preprocess_pifpaf(annotations, im_size, enlarge_boxes=False) - dic_out = monoloco.forward(keypoints, kk) - dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=False) - if args.social_distance: - show_social(args, image, output_path, annotations, dic_out) - - print('Image {}\n'.format(cnt) + '-' * 120) - cnt += 1 - - def show_social(args, image_t, output_path, annotations, dic_out): """Output frontal image with poses or combined with bird eye view""" diff --git a/monstereo/predict.py b/monstereo/predict.py index 676e183..d8d230d 100644 --- a/monstereo/predict.py +++ b/monstereo/predict.py @@ -17,7 +17,6 @@ from openpifpaf.predict import processor_factory, preprocess_factory from openpifpaf import decoder, network, visualizer, show from .visuals.printer import Printer -from .visuals.pifpaf_show import KeypointPainter from .network import Loco from .network.process import factory_for_gt, preprocess_pifpaf from .activity import show_social @@ -137,7 +136,6 @@ def predict(args): dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=reorder) if args.social_distance: - # image_t = torchvision.transforms.functional.to_tensor(image).permute(1, 2, 0) show_social(args, cpu_image, output_path, pifpaf_out, dic_out) else: @@ -151,13 +149,13 @@ def predict(args): kk = None if not args.social_distance: - factory_outputs(args, annotation_painter, cpu_image, output_path, pifpaf_outputs, pifpaf_out, + factory_outputs(args, annotation_painter, cpu_image, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk) print('Image {}\n'.format(cnt) + '-' * 120) cnt += 1 -def factory_outputs(args, annotation_painter, cpu_image, output_path, pred, pifpaf_out, dic_out=None, kk=None): +def factory_outputs(args, annotation_painter, cpu_image, output_path, pred, dic_out=None, kk=None): """Output json files or images according to the choice""" # Save json file @@ -165,31 +163,6 @@ def factory_outputs(args, annotation_painter, cpu_image, output_path, pred, pifp with openpifpaf.show.image_canvas(cpu_image, output_path) as ax: annotation_painter.annotations(ax, pred) - # Visualizer - keypoint_painter = KeypointPainter(show_box=False) - skeleton_painter = KeypointPainter(show_box=False, color_connections=True, markersize=1, linewidth=4) - - if 'json' in args.output_types and len(pred) > 0: - with open(output_path + '.pifpaf.json', 'w') as f: - json.dump(pifpaf_out, f) - - # if 'keypoints' in args.output_types: - # with image_canvas(images_outputs[0], - # output_path + '.keypoints.png', - # show=args.show, - # fig_width=args.figure_width, - # dpi_factor=args.dpi_factor) as ax: - # keypoint_painter.keypoints(ax, keypoint_sets) - # - # if 'skeleton' in args.output_types: - # with image_canvas(images_outputs[0], - # output_path + '.skeleton.png', - # show=args.show, - # fig_width=args.figure_width, - # dpi_factor=args.dpi_factor) as ax: - # skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) - - else: if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])): print(output_path) if dic_out['boxes']: # Only print in case of detections diff --git a/monstereo/predict_old.py b/monstereo/predict_old.py deleted file mode 100644 index d869386..0000000 --- a/monstereo/predict_old.py +++ /dev/null @@ -1,146 +0,0 @@ - -# pylint: disable=too-many-statements, too-many-branches, undefined-loop-variable - -import os -import json -from collections import defaultdict - - -import torch -from PIL import Image - -from .visuals.printer import Printer -from .visuals.pifpaf_show import KeypointPainter, image_canvas -from .network import PifPaf, ImageList, Loco -from .network.process import factory_for_gt, preprocess_pifpaf - - -def predict(args): - - cnt = 0 - - # Load Models - pifpaf = PifPaf(args) - assert args.mode in ('mono', 'stereo', 'pifpaf') - - if 'mono' in args.mode: - monoloco = Loco(model=args.model, net='monoloco_pp', - device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - - if 'stereo' in args.mode: - monstereo = Loco(model=args.model, net='monstereo', - device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - - # data - data = ImageList(args.images, scale=args.scale) - if args.mode == 'stereo': - assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting" - bs = 2 - else: - bs = 1 - data_loader = torch.utils.data.DataLoader( - data, batch_size=bs, shuffle=False, - pin_memory=args.pin_memory, num_workers=args.loader_workers) - - for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader): - images = image_tensors.permute(0, 2, 3, 1) - - processed_images = processed_images_cpu.to(args.device, non_blocking=True) - fields_batch = pifpaf.fields(processed_images) - - # unbatch stereo pair - for ii, (image_path, image, processed_image_cpu, fields) in enumerate(zip( - image_paths, images, processed_images_cpu, fields_batch)): - - if args.output_directory is None: - splits = os.path.split(image_paths[0]) - output_path = os.path.join(splits[0], 'out_' + splits[1]) - else: - file_name = os.path.basename(image_paths[0]) - output_path = os.path.join(args.output_directory, 'out_' + file_name) - print('image', idx, image_path, output_path) - keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields) - - if ii == 0: - pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing - images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor and monoloco original image - pifpaf_outs = {'left': pifpaf_out} - image_path_l = image_path - else: - pifpaf_outs['right'] = pifpaf_out - - if args.mode in ('stereo', 'mono'): - # Extract calibration matrix and ground truth file if present - with open(image_path_l, 'rb') as f: - pil_image = Image.open(f).convert('RGB') - images_outputs.append(pil_image) - - im_name = os.path.basename(image_path_l) - im_size = (float(image.size()[1] / args.scale), float(image.size()[0] / args.scale)) # Original - kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt) - - # Preprocess pifpaf outputs and run monoloco - boxes, keypoints = preprocess_pifpaf(pifpaf_outs['left'], im_size, enlarge_boxes=False) - - if args.mode == 'mono': - print("Prediction with MonoLoco++") - dic_out = monoloco.forward(keypoints, kk) - dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt) - - else: - print("Prediction with MonStereo") - boxes_r, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size) - dic_out = monstereo.forward(keypoints, kk, keypoints_r=keypoints_r) - dic_out = monstereo.post_process(dic_out, boxes, keypoints, kk, dic_gt) - - else: - dic_out = defaultdict(list) - kk = None - - factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk) - print('Image {}\n'.format(cnt) + '-' * 120) - cnt += 1 - - -def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None): - """Output json files or images according to the choice""" - - # Save json file - if args.mode == 'pifpaf': - keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:] - - # Visualizer - keypoint_painter = KeypointPainter(show_box=False) - skeleton_painter = KeypointPainter(show_box=False, color_connections=True, markersize=1, linewidth=4) - - if 'json' in args.output_types and keypoint_sets.size > 0: - with open(output_path + '.pifpaf.json', 'w') as f: - json.dump(pifpaf_out, f) - - if 'keypoints' in args.output_types: - with image_canvas(images_outputs[0], - output_path + '.keypoints.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - keypoint_painter.keypoints(ax, keypoint_sets) - - if 'skeleton' in args.output_types: - with image_canvas(images_outputs[0], - output_path + '.skeleton.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) - - else: - if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])): - print(output_path) - if dic_out['boxes']: # Only print in case of detections - printer = Printer(images_outputs[1], output_path, kk, args) - figures, axes = printer.factory_axes() - printer.draw(figures, axes, dic_out, images_outputs[1]) - - if 'json' in args.output_types: - with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff: - json.dump(dic_out, ff)