diff --git a/.pylintrc b/.pylintrc index bce16c1..dc2e7fa 100644 --- a/.pylintrc +++ b/.pylintrc @@ -9,7 +9,7 @@ Good-names=xx,dd,zz,hh,ww,pp,kk,lr,w1,w2,w3,mm,im,uv,ax,COV_MIN,CONF_MIN [TYPECHECK] -disable=import-error,invalid-name,unused-variable,fixme,E1102,missing-docstring,useless-object-inheritance,duplicate-code,too-many-arguments,too-many-instance-attributes,too-many-locals,too-few-public-methods,arguments-differ,logging-format-interpolation +disable=import-error,invalid-name,unused-variable,E1102,missing-docstring,useless-object-inheritance,duplicate-code,too-many-arguments,too-many-instance-attributes,too-many-locals,too-few-public-methods,arguments-differ,logging-format-interpolation,import-outside-toplevel # List of members which are set dynamically and missed by pylint inference diff --git a/LICENSE b/LICENSE index 40f5c41..8ddfc99 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2020 by EPFL/VITA. All rights reserved. +Copyright 2020-2021 by EPFL/VITA. All rights reserved. This project and all its files are licensed under GNU AGPLv3 or later version. @@ -6,4 +6,4 @@ GNU AGPLv3 or later version. If this license is not suitable for your business or project please contact EPFL-TTO (https://tto.epfl.ch/) for a full commercial license. -This software may not be used to harm any person deliberately. +This software may not be used to harm any person deliberately or for any military application. diff --git a/README.md b/README.md index c3529c3..d780e15 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,61 @@ # Perceiving Humans in 3D -This repository contains the code for three research projects: - -1. **MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization** -[README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonStereo.md) & [Article](https://arxiv.org/abs/2008.10913) - - ![monstereo](docs/out_005523.png) - -2. **Perceiving Humans: from Monocular 3D Localization to Social Distancing** +This repository contains the code for two research projects: + +1. **Perceiving Humans: from Monocular 3D Localization to Social Distancing (MonoLoco++)** [README](https://github.com/vita-epfl/monstereo/blob/update/docs/MonoLoco%2B%2B.md) & [Article](https://arxiv.org/abs/2009.00984) - ![social distancing](docs/pull_sd.png) + ![social distancing](docs/social_distancing.jpg) -3. **MonoLoco: Monocular 3D Pedestrian Localization and Uncertainty Estimation** (Improved!) -[README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonoLoco.md) & [Article](https://arxiv.org/abs/1906.06059) & [Original Repo](https://github.com/vita-epfl/monoloco) + ![monoloco_pp](docs/truck.jpg) + + +2. **MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization** +[README](https://github.com/vita-epfl/monstereo/blob/update/docs/MonStereo.md) & [Article](https://arxiv.org/abs/2008.10913) + + ![monstereo 1](docs/000840_multi.jpg) - ![monoloco](docs/truck.png) +Both projects has been built upon the CVPR'19 project [Openpifpaf](https://github.com/vita-epfl/openpifpaf) +for 2D pose estimation and the ICCV'19 project [MonoLoco](https://github.com/vita-epfl/monoloco) for monocular 3D localization. +All projects share the AGPL Licence. + +# Setup +Installation steps are the same for both projects. + +### Install +The installation has been tested on OSX and Linux operating systems, with Python 3.6 or Python 3.7. +Packages have been installed with pip and virtual environments. +For quick installation, do not clone this repository, +and make sure there is no folder named monstereo in your current directory. +A GPU is not required, yet highly recommended for real-time performances. +MonStereo can be installed as a package, by: + +``` +pip3 install monstereo +``` + +For development of the monstereo source code itself, you need to clone this repository and then: +``` +pip3 install sdist +cd monstereo +python3 setup.py sdist bdist_wheel +pip3 install -e . +``` + +### Interfaces +All the commands are run through a main file called `main.py` using subparsers. +To check all the commands for the parser and the subparsers (including openpifpaf ones) run: + +* `python3 -m monstereo.run --help` +* `python3 -m monstereo.run predict --help` +* `python3 -m monstereo.run train --help` +* `python3 -m monstereo.run eval --help` +* `python3 -m monstereo.run prep --help` + +or check the file `monstereo/run.py` + +Further instructions for prediction, preprocessing, training and evaluation can be found here: + +* [MonStereo README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonStereo.md) +* [MonoLoco++ README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonoLoco_pp.md) diff --git a/docs/000840_multi.jpg b/docs/000840_multi.jpg new file mode 100644 index 0000000..fd6676a Binary files /dev/null and b/docs/000840_multi.jpg differ diff --git a/docs/002282.png b/docs/002282.png new file mode 100755 index 0000000..e81fe3b Binary files /dev/null and b/docs/002282.png differ diff --git a/docs/MonStereo.md b/docs/MonStereo.md index 5896b1a..5e74893 100644 --- a/docs/MonStereo.md +++ b/docs/MonStereo.md @@ -24,53 +24,15 @@ month = {August}, year = {2020} } ``` + +# Prediction +The predict script receives an image (or an entire folder using glob expressions), +calls PifPaf for 2d human pose detection over the image +and runs MonStereo for 3d location of the detected poses. -# Features -The code has been built upon the ICCV'19 project [MonoLoco](https://github.com/vita-epfl/monoloco). -This repository supports +Output options include json files and/or visualization of the predictions on the image in *frontal mode*, +*birds-eye-view mode* or *multi mode* and can be specified with `--output_types` -* the original MonoLoco -* An improved Monocular version (MonoLoco++) for x,y,z coordinates, orientation, and dimensions -* MonStereo - -# Setup - -### Install -The installation has been tested on OSX and Linux operating systems, with Python 3.6 or Python 3.7. -Packages have been installed with pip and virtual environments. -For quick installation, do not clone this repository, -and make sure there is no folder named monstereo in your current directory. -A GPU is not required, yet highly recommended for real-time performances. -MonStereo can be installed as a package, by: - -``` -pip3 install monstereo -``` - -For development of the monstereo source code itself, you need to clone this repository and then: -``` -pip3 install sdist -cd monstereo -python3 setup.py sdist bdist_wheel -pip3 install -e . -``` - -### Data structure - - Data - ├── arrays - ├── models - ├── kitti - ├── logs - ├── output - - -Run the following to create the folders: -``` -mkdir data -cd data -mkdir arrays models kitti logs output -``` ### Pre-trained Models * Download Monstereo pre-trained model from @@ -85,27 +47,6 @@ Alternatively, you can download a Pifpaf pre-trained model from [openpifpaf](htt If you'd like to use an updated version, we suggest to re-train the MonStereo model as well. * The model for the experiments is provided in *data/models/ms-200710-1511.pkl* -# Interfaces -All the commands are run through a main file called `main.py` using subparsers. -To check all the commands for the parser and the subparsers (including openpifpaf ones) run: - -* `python3 -m monstereo.run --help` -* `python3 -m monstereo.run predict --help` -* `python3 -m monstereo.run train --help` -* `python3 -m monstereo.run eval --help` -* `python3 -m monstereo.run prep --help` - -or check the file `monstereo/run.py` - -# Prediction -The predict script receives an image (or an entire folder using glob expressions), -calls PifPaf for 2d human pose detection over the image -and runs MonStereo for 3d location of the detected poses. - - -Output options include json files and/or visualization of the predictions on the image in *frontal mode*, -*birds-eye-view mode* or *multi mode* and can be specified with `--output_types` - ### Ground truth matching * In case you provide a ground-truth json file to compare the predictions of MonSter, @@ -125,13 +66,13 @@ After downloading model and ground-truth file, a demo can be tested with the fol --model data/models/ms-200710-1511.pkl --z_max 30 --checkpoint resnet152 --path_gt data/arrays/names-kitti-200615-1022.json -o data/output` -![Crowded scene](out_000840.png) +![Crowded scene](out_000840.jpg) `python3 -m monstereo.run predict --glob docs/005523*.png --output_types multi --scale 2 --model data/models/ms-200710-1511.pkl --z_max 30 --checkpoint resnet152 --path_gt data/arrays/names-kitti-200615-1022.json -o data/output` -![Occluded hard example](out_005523.png) +![Occluded hard example](out_005523.jpg) # Preprocessing Preprocessing and training step are already fully supported by the code provided, @@ -139,6 +80,22 @@ but require first to run a pose detector over all the training images and collect the annotations. The code supports this option (by running the predict script and using `--mode pifpaf`). +### Data structure + + Data + ├── arrays + ├── models + ├── kitti + ├── logs + ├── output + +Run the following to create the folders: +``` +mkdir data +cd data +mkdir arrays models kitti logs output +``` + ### Datasets Download KITTI ground truth files and camera calibration matrices for training @@ -149,13 +106,20 @@ data/kitti/images` ### Annotations to preprocess -MonStereo is trained using 2D human pose joints. To create them run pifaf over KITTI training images. -You can create them running the predict script and using `--mode pifpaf`. +MonStereo is trained using 2D human pose joints. To obtain the joints the first step is to run +pifaf over KITTI training images, by either running the predict script and using `--mode pifpaf`, + or by using pifpaf code directly. +MonStereo preprocess script expects annotations from left and right images in 2 different folders +with the same path apart from the suffix `_right` for the ``right" folder. +For example `data/annotations` and `data/annotations_right`. + Do not change name of json files created by pifpaf. For each left annotation, + the code will look for the corresponding right annotation. ### Inputs joints for training MonoStereo is trained using 2D human pose joints matched with the ground truth location provided by KITTI Dataset. To create the joints run: `python3 -m monstereo.run prep` specifying: -1. `--dir_ann` annotation directory containing Pifpaf joints of KITTI. + +`--dir_ann` annotation directory containing Pifpaf joints of KITTI for the left images. ### Ground truth file for evaluation @@ -165,7 +129,7 @@ by the image name to easily access ground truth files for evaluation and predict # Training Provide the json file containing the preprocess joints as argument. -As simple as `python3 -m monstereo.run train --joints ` +As simple as `python3 -m monstereo.run train --joints ` All the hyperparameters options can be checked at `python3 -m monstereo.run train --help`. # Evaluation (KITTI Dataset) diff --git a/docs/MonoLoco++.md b/docs/MonoLoco++.md new file mode 100644 index 0000000..9c867fb --- /dev/null +++ b/docs/MonoLoco++.md @@ -0,0 +1,216 @@ + +# Perceiving Humans: from Monocular 3D Localization to Social Distancing + +> Perceiving humans in the context of Intelligent Transportation Systems (ITS) +often relies on multiple cameras or expensive LiDAR sensors. +In this work, we present a new cost- effective vision-based method that perceives humans’ locations in 3D +and their body orientation from a single image. +We address the challenges related to the ill-posed monocular 3D tasks by proposing a deep learning method +that predicts confidence intervals in contrast to point estimates. Our neural network architecture estimates +humans 3D body locations and their orientation with a measure of uncertainty. +Our vision-based system (i) is privacy-safe, (ii) works with any fixed or moving cameras, + and (iii) does not rely on ground plane estimation. + We demonstrate the performance of our method with respect to three applications: + locating humans in 3D, detecting social interactions, + and verifying the compliance of recent safety measures due to the COVID-19 outbreak. + Indeed, we show that we can rethink the concept of “social distancing” as a form of social interaction + in contrast to a simple location-based rule. We publicly share the source code towards an open science mission. + +``` +@InProceedings{bertoni_social, +author = {Bertoni, Lorenzo and Kreiss, Sven and Alahi, Alexandre}, +title={Perceiving Humans: from Monocular 3D Localization to Social Distancing}, +booktitle = {arXiv:2009.00984}, +month = {September}, +year = {2020} +} +``` +![social distancing](social_distancing.jpg) + +## Predictions +For a quick setup download a pifpaf and a MonoLoco++ models from +[here](https://drive.google.com/drive/folders/1jZToVMBEZQMdLB5BAIq2CdCLP5kzNo9t?usp=sharing) +and save them into `data/models`. + +### 3D Localization +The predict script receives an image (or an entire folder using glob expressions), +calls PifPaf for 2d human pose detection over the image +and runs Monoloco++ for 3d location of the detected poses. +The command `--net` defines if saving pifpaf outputs, MonoLoco++ outputs or MonStereo ones. +You can check all commands for Pifpaf at [openpifpaf](https://github.com/vita-epfl/openpifpaf). + +Output options include json files and/or visualization of the predictions on the image in *frontal mode*, +*birds-eye-view mode* or *combined mode* and can be specified with `--output_types` + +Ground-truth KITTI files for comparing results can be downloaded from +[here](https://drive.google.com/drive/folders/1jZToVMBEZQMdLB5BAIq2CdCLP5kzNo9t?usp=sharing) +(file called *names-kitti*) and should be saved into `data/arrays` +Ground-truth files can also be generated, more info in the preprocessing section. + +For an example image, run the following command: + +``` +python -m monstereo.run predict \ +docs/002282.png \ +--net monoloco_pp \ +--output_types multi \ +--model data/models/monoloco_pp-201203-1424.pkl \ +--path_gt data/arrays/names-kitti-200615-1022.json \ +-o \ +--long-edge +--n_dropout <50 to include epistemic uncertainty, 0 otherwise> +``` + +![predict](out_002282.png.multi.jpg) + +To show all the instances estimated by MonoLoco add the argument `show_all` to the above command. + +![predict_all](out_002282.png.multi_all.jpg) + +### Social Distancing +To visualize social distancing compliance, simply add the argument `--social-distance` to the predict command. + +An example from the Collective Activity Dataset is provided below. + + + +To visualize social distancing run the below, command: +``` +python -m monstereo.run predict \ +docs/frame0038.jpg \ +--net monoloco_pp \ +--social_distance \ +--output_types front bird --show_all \ +--model data/models/monoloco_pp-201203-1424.pkl -o +``` + + + + + +Threshold distance and radii (for F-formations) can be set using `--threshold-dist` and `--radii`, respectively. + +For more info, run: + +`python -m monstereo.run predict --help` + +### Orientation and Bounding Box dimensions +MonoLoco++ estimates orientation and box dimensions as well. Results are saved in a json file when using the command +`--output_types json`. At the moment, the only visualization including orientation is the social distancing one. + +## Preprocessing + +### Kitti +Annotations from a pose detector needs to be stored in a folder. +For example by using [openpifpaf](https://github.com/vita-epfl/openpifpaf): +``` +python -m openpifpaf.predict \ +--glob "/*.png" \ +--json-output +--checkpoint=shufflenetv2k30 \ +--instance-threshold=0.05 --seed-threshold 0.05 --force-complete-pose +``` +Once the step is complete: +`python -m monstereo.run prep --dir_ann --monocular` + + +### Collective Activity Dataset +To evaluate on of the [collective activity dataset](http://vhosts.eecs.umich.edu/vision//activity-dataset.html) + (without any training) we selected 6 scenes that contain people talking to each other. + This allows for a balanced dataset, but any other configuration will work. + +THe expected structure for the dataset is the following: + + collective_activity + ├── images + ├── annotations + +where images and annotations inside have the following name convention: + +IMAGES: seq_frame.jpg +ANNOTATIONS: seq_annotations.txt + +With respect to the original datasets the images and annotations are moved to a single folder +and the sequence is added in their name. One command to do this is: + +`rename -v -n 's/frame/seq14_frame/' f*.jpg` + +which for example change the name of all the jpg images in that folder adding the sequence number + (remove `-n` after checking it works) + +Pifpaf annotations should also be saved in a single folder and can be created with: + +``` +python -m openpifpaf.predict \ +--glob "data/collective_activity/images/*.jpg" \ +--checkpoint=shufflenetv2k30 \ +--instance-threshold=0.05 --seed-threshold 0.05 --force-complete-pose\ +--json-output /data/lorenzo-data/annotations/collective_activity/v012 +``` + +Finally, to evaluate activity using a MonoLoco++ pre-trained model trained either on nuSCENES or KITTI: +``` +python -m monstereo.run eval --activity \ +--net monoloco_pp --dataset collective \ +--model --dir_ann +``` + +## Training +We train on KITTI or nuScenes dataset specifying the path of the input joints. + +Our results are obtained with: + +`python -m monstereo.run train --lr 0.001 --joints data/arrays/joints-kitti-201202-1743.json --save --monocular` + +For a more extensive list of available parameters, run: + +`python -m monstereo.run train --help` + +## Evaluation + +### 3D Localization +We provide evaluation on KITTI for models trained on nuScenes or KITTI. We compare them with other monocular +and stereo Baselines: + +[MonoLoco](https://github.com/vita-epfl/monoloco), +[Mono3D](https://www.cs.toronto.edu/~urtasun/publications/chen_etal_cvpr16.pdf), +[3DOP](https://xiaozhichen.github.io/papers/nips15chen.pdf), +[MonoDepth](https://arxiv.org/abs/1609.03677) +[MonoPSR](https://github.com/kujason/monopsr) and our +[MonoDIS](https://research.mapillary.com/img/publications/MonoDIS.pdf) and our +[Geometrical Baseline](monoloco/eval/geom_baseline.py). + +* **Mono3D**: download validation files from [here](http://3dimage.ee.tsinghua.edu.cn/cxz/mono3d) +and save them into `data/kitti/m3d` +* **3DOP**: download validation files from [here](https://xiaozhichen.github.io/) +and save them into `data/kitti/3dop` +* **MonoDepth**: compute an average depth for every instance using the following script +[here](https://github.com/Parrotlife/pedestrianDepth-baseline/tree/master/MonoDepth-PyTorch) +and save them into `data/kitti/monodepth` +* **GeometricalBaseline**: A geometrical baseline comparison is provided. + +Download the model for monoloco + +The average geometrical value for comparison can be obtained running: +``` +python -m monstereo.run eval +--dir_ann +--model +--net monoloco_pp +--generate +```` + +To include also geometric baselines and MonoLoco, add the flag ``--baselines`` + +### Activity Estimation (Talking) +Please follow preprocessing steps for Collective activity dataset and run pifpaf over the dataset images. +Evaluation on this dataset is done with models trained on either KITTI or nuScenes. +For optimal performances, we suggest the model trained on nuScenes teaser (TODO add link) +``` +python -m monstereo.run eval +--activity +--dataset collective +--net monoloco_pp +--model +--dir_ann +``` diff --git a/docs/MonoLoco.md b/docs/MonoLoco.md deleted file mode 100644 index e48e5e0..0000000 --- a/docs/MonoLoco.md +++ /dev/null @@ -1,14 +0,0 @@ - -### Work in Progress - -For the moment please refer to the [original repository](https://github.com/vita-epfl/monoloco) - -``` -@InProceedings{bertoni_perceiving, -author = {Bertoni, Lorenzo and Kreiss, Sven and Alahi, Alexandre}, -title = {Perceiving Humans: from Monocular 3D Localization to Social Distancing}, -booktitle = {arXiv:2009.00984}, -month = {September}, -year = {2020} -} -``` \ No newline at end of file diff --git a/docs/frame0038.jpg b/docs/frame0038.jpg new file mode 100644 index 0000000..7050b2c Binary files /dev/null and b/docs/frame0038.jpg differ diff --git a/docs/out_000840.jpg b/docs/out_000840.jpg new file mode 100644 index 0000000..621553b Binary files /dev/null and b/docs/out_000840.jpg differ diff --git a/docs/out_000840.png b/docs/out_000840.png deleted file mode 100644 index 77eefa7..0000000 Binary files a/docs/out_000840.png and /dev/null differ diff --git a/docs/out_002282.png.multi.jpg b/docs/out_002282.png.multi.jpg new file mode 100644 index 0000000..8217b35 Binary files /dev/null and b/docs/out_002282.png.multi.jpg differ diff --git a/docs/out_002282.png.multi_all.jpg b/docs/out_002282.png.multi_all.jpg new file mode 100644 index 0000000..08722fd Binary files /dev/null and b/docs/out_002282.png.multi_all.jpg differ diff --git a/docs/out_005523.jpg b/docs/out_005523.jpg new file mode 100644 index 0000000..5488f35 Binary files /dev/null and b/docs/out_005523.jpg differ diff --git a/docs/out_005523.png b/docs/out_005523.png deleted file mode 100644 index 9299ef9..0000000 Binary files a/docs/out_005523.png and /dev/null differ diff --git a/docs/out_frame0038.jpg.bird.png b/docs/out_frame0038.jpg.bird.png new file mode 100644 index 0000000..19cdca5 Binary files /dev/null and b/docs/out_frame0038.jpg.bird.png differ diff --git a/docs/out_frame0038.jpg.front.png b/docs/out_frame0038.jpg.front.png new file mode 100644 index 0000000..e64071f Binary files /dev/null and b/docs/out_frame0038.jpg.front.png differ diff --git a/docs/pull_sd.png b/docs/pull_sd.png deleted file mode 100644 index 8cc8301..0000000 Binary files a/docs/pull_sd.png and /dev/null differ diff --git a/docs/social_distancing.jpg b/docs/social_distancing.jpg new file mode 100644 index 0000000..16fecfa Binary files /dev/null and b/docs/social_distancing.jpg differ diff --git a/docs/truck.jpg b/docs/truck.jpg new file mode 100644 index 0000000..7d1b3af Binary files /dev/null and b/docs/truck.jpg differ diff --git a/docs/truck.png b/docs/truck.png deleted file mode 100644 index f77d8e2..0000000 Binary files a/docs/truck.png and /dev/null differ diff --git a/monstereo/activity.py b/monstereo/activity.py index 0cbf395..0ea288b 100644 --- a/monstereo/activity.py +++ b/monstereo/activity.py @@ -2,24 +2,16 @@ # pylint: disable=too-many-statements import math -import glob -import os import copy from contextlib import contextmanager import numpy as np import torch -import torch.nn.functional as F -import torchvision import matplotlib.pyplot as plt from matplotlib.patches import Circle, FancyArrow -from PIL import Image from .network.process import laplace_sampling -from .utils import open_annotations from .visuals.pifpaf_show import KeypointPainter, image_canvas -from .network import Loco -from .network.process import factory_for_gt, preprocess_pifpaf def social_interactions(idx, centers, angles, dds, stds=None, social_distance=False, @@ -27,17 +19,20 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa """ return flag of alert if social distancing is violated """ + + # A) Check whether people are close together xx = centers[idx][0] zz = centers[idx][1] distances = [math.sqrt((xx - centers[i][0]) ** 2 + (zz - centers[i][1]) ** 2) for i, _ in enumerate(centers)] sorted_idxs = np.argsort(distances) indices = [idx_t for idx_t in sorted_idxs[1:] if distances[idx_t] <= threshold_dist] + # B) Check whether people are looking inwards and whether there are no intrusions # Deterministic if n_samples < 2: for idx_t in indices: if check_f_formations(idx, idx_t, centers, angles, - radii=radii, # Binary value + radii=radii, # Binary value social_distance=social_distance): return True @@ -72,8 +67,8 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False): """ - Check F-formations for people close together: - 1) Empty space of 0.4 + meters (no other people or themselves inside) + Check F-formations for people close together (this function do not expect far away people): + 1) Empty space of a certain radius (no other people or themselves inside) 2) People looking inward """ @@ -91,119 +86,25 @@ def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False mu_1 = np.array([centers[idx_t][0] + radius * math.cos(theta1), centers[idx_t][1] - radius * math.sin(theta1)]) o_c = (mu_0 + mu_1) / 2 - # Verify they are looking inwards. + # 1) Verify they are looking inwards. # The distance between mus and the center should be less wrt the original position and the center d_new = np.linalg.norm(mu_0 - mu_1) / 2 if social_distance else np.linalg.norm(mu_0 - mu_1) d_0 = np.linalg.norm(x_0 - o_c) d_1 = np.linalg.norm(x_1 - o_c) - # Verify no intrusion for third parties + # 2) Verify no intrusion for third parties if other_centers.size: other_distances = np.linalg.norm(other_centers - o_c.reshape(1, -1), axis=1) else: other_distances = 100 * np.ones((1, 1)) # Condition verified if no other people # Binary Classification + # if np.min(other_distances) > radius: # Ablation without orientation if d_new <= min(d_0, d_1) and np.min(other_distances) > radius: return True return False -def predict(args): - - cnt = 0 - args.device = torch.device('cpu') - if torch.cuda.is_available(): - args.device = torch.device('cuda') - - # Load data and model - monoloco = Loco(model=args.model, net='monoloco_pp', - device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - - images = [] - images += glob.glob(args.glob) # from cli as a string or linux converts - - # Option 1: Run PifPaf extract poses and run MonoLoco in a single forward pass - if args.json_dir is None: - from .network import PifPaf, ImageList - pifpaf = PifPaf(args) - data = ImageList(args.images, scale=args.scale) - data_loader = torch.utils.data.DataLoader( - data, batch_size=1, shuffle=False, - pin_memory=args.pin_memory, num_workers=args.loader_workers) - - for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader): - images = image_tensors.permute(0, 2, 3, 1) - - processed_images = processed_images_cpu.to(args.device, non_blocking=True) - fields_batch = pifpaf.fields(processed_images) - - # unbatch - for image_path, image, processed_image_cpu, fields in zip( - image_paths, images, processed_images_cpu, fields_batch): - - if args.output_directory is None: - output_path = image_path - else: - file_name = os.path.basename(image_path) - output_path = os.path.join(args.output_directory, file_name) - im_size = (float(image.size()[1] / args.scale), - float(image.size()[0] / args.scale)) - - print('image', idx, image_path, output_path) - - _, _, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields) - - kk, dic_gt = factory_for_gt(im_size, name=image_path, path_gt=args.path_gt) - image_t = image # Resized tensor - - # Run Monoloco - boxes, keypoints = preprocess_pifpaf(pifpaf_out, im_size, enlarge_boxes=False) - dic_out = monoloco.forward(keypoints, kk) - dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=False) - - # Print - show_social(args, image_t, output_path, pifpaf_out, dic_out) - - print('Image {}\n'.format(cnt) + '-' * 120) - cnt += 1 - - # Option 2: Load json file of poses from PifPaf and run monoloco - else: - for idx, im_path in enumerate(images): - - # Load image - with open(im_path, 'rb') as f: - image = Image.open(f).convert('RGB') - if args.output_directory is None: - output_path = im_path - else: - file_name = os.path.basename(im_path) - output_path = os.path.join(args.output_directory, file_name) - - im_size = (float(image.size[0] / args.scale), - float(image.size[1] / args.scale)) # Width, Height (original) - kk, dic_gt = factory_for_gt(im_size, name=im_path, path_gt=args.path_gt) - image_t = torchvision.transforms.functional.to_tensor(image).permute(1, 2, 0) - - # Load json - basename, ext = os.path.splitext(os.path.basename(im_path)) - - extension = ext + '.pifpaf.json' - path_json = os.path.join(args.json_dir, basename + extension) - annotations = open_annotations(path_json) - - # Run Monoloco - boxes, keypoints = preprocess_pifpaf(annotations, im_size, enlarge_boxes=False) - dic_out = monoloco.forward(keypoints, kk) - dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=False) - # Print - show_social(args, image_t, output_path, annotations, dic_out) - - print('Image {}\n'.format(cnt) + '-' * 120) - cnt += 1 - - def show_social(args, image_t, output_path, annotations, dic_out): """Output frontal image with poses or combined with bird eye view""" @@ -214,24 +115,17 @@ def show_social(args, image_t, output_path, annotations, dic_out): stds = dic_out['stds_ale'] xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']] + # Prepare color for social distancing + colors = ['r' if social_interactions(idx, xz_centers, angles, dds, + stds=stds, + threshold_prob=args.threshold_prob, + threshold_dist=args.threshold_dist, + radii=args.radii) + else 'deepskyblue' + for idx, _ in enumerate(dic_out['xyz_pred'])] + + # Draw keypoints and orientation if 'front' in args.output_types: - - # Resize back the tensor image to its original dimensions - if not 0.99 < args.scale < 1.01: - size = (round(image_t.shape[0] / args.scale), round(image_t.shape[1] / args.scale)) # height width - image_t = image_t.permute(2, 0, 1).unsqueeze(0) # batch x channels x height x width - image_t = F.interpolate(image_t, size=size).squeeze().permute(1, 2, 0) - - # Prepare color for social distancing - colors = ['r' if social_interactions(idx, xz_centers, angles, dds, - stds=stds, - threshold_prob=args.threshold_prob, - threshold_dist=args.threshold_dist, - radii=args.radii) - else 'deepskyblue' - for idx, _ in enumerate(dic_out['xyz_pred'])] - - # Draw keypoints and orientation keypoint_sets, scores = get_pifpaf_outputs(annotations) uv_centers = dic_out['uv_heads'] sizes = [abs(dic_out['uv_heads'][idx][1] - uv_s[1]) / 1.5 for idx, uv_s in @@ -247,20 +141,20 @@ def show_social(args, image_t, output_path, annotations, dic_out): draw_orientation(ax, uv_centers, sizes, angles, colors, mode='front') if 'bird' in args.output_types: - with bird_canvas(args, output_path) as ax1: + z_max = min(args.z_max, 4 + max([el[1] for el in xz_centers])) + with bird_canvas(output_path, z_max) as ax1: draw_orientation(ax1, xz_centers, [], angles, colors, mode='bird') draw_uncertainty(ax1, xz_centers, stds) def get_pifpaf_outputs(annotations): + # TODO extract direct from predictions with pifpaf 0.11+ """Extract keypoints sets and scores from output dictionary""" if not annotations: return [], [] - keypoints_sets = np.array([dic['keypoints'] for dic in annotations]).reshape(-1, 17, 3) + keypoints_sets = np.array([dic['keypoints'] for dic in annotations]).reshape((-1, 17, 3)) score_weights = np.ones((keypoints_sets.shape[0], 17)) score_weights[:, 3] = 3.0 - # score_weights[:, 5:] = 0.1 - # score_weights[:, -2:] = 0.0 # ears are not annotated score_weights /= np.sum(score_weights[0, :]) kps_scores = keypoints_sets[:, :, 2] ordered_kps_scores = np.sort(kps_scores, axis=1)[:, ::-1] @@ -269,14 +163,14 @@ def get_pifpaf_outputs(annotations): @contextmanager -def bird_canvas(args, output_path): +def bird_canvas(output_path, z_max): fig, ax = plt.subplots(1, 1) fig.set_tight_layout(True) output_path = output_path + '.bird.png' - x_max = args.z_max / 1.5 - ax.plot([0, x_max], [0, args.z_max], 'k--') - ax.plot([0, -x_max], [0, args.z_max], 'k--') - ax.set_ylim(0, args.z_max + 1) + x_max = z_max / 1.5 + ax.plot([0, x_max], [0, z_max], 'k--') + ax.plot([0, -x_max], [0, z_max], 'k--') + ax.set_ylim(0, z_max + 1) yield ax fig.savefig(output_path) plt.close(fig) diff --git a/monstereo/eval/eval_activity.py b/monstereo/eval/eval_activity.py index 1322c51..6d7a8fc 100644 --- a/monstereo/eval/eval_activity.py +++ b/monstereo/eval/eval_activity.py @@ -23,24 +23,28 @@ class ActivityEvaluator: def __init__(self, args): + self.dir_ann = args.dir_ann + assert self.dir_ann is not None and os.path.exists(self.dir_ann), \ + "Annotation directory not provided / does not exist" + assert os.listdir(self.dir_ann), "Annotation directory is empty" + # COLLECTIVE ACTIVITY DATASET (talking) # ------------------------------------------------------------------------------------------------------------- if args.dataset == 'collective': - self.folders_collective = ['seq02', 'seq14', 'seq12', 'seq13', 'seq11', 'seq36'] + self.sequences = ['seq02', 'seq14', 'seq12', 'seq13', 'seq11', 'seq36'] # folders_collective = ['seq02'] - self.path_collective = ['data/activity/' + fold for fold in self.folders_collective] + self.dir_data = 'data/activity/dataset' self.THRESHOLD_PROB = 0.25 # Concordance for samples self.THRESHOLD_DIST = 2 # Threshold to check distance of people self.RADII = (0.3, 0.5) # expected radii of the o-space - self.PIFPAF_CONF = 0.4 + self.PIFPAF_CONF = 0.3 self.SOCIAL_DISTANCE = False # ------------------------------------------------------------------------------------------------------------- # KITTI DATASET (social distancing) # ------------------------------------------------------------------------------------------------------------ else: - self.dir_ann_kitti = '/data/lorenzo-data/annotations/kitti/scale_2_july' - self.dir_gt_kitti = 'data/kitti/gt_activity' + self.dir_data = 'data/kitti/gt_activity' self.dir_kk = os.path.join('data', 'kitti', 'calib') self.THRESHOLD_PROB = 0.25 # Concordance for samples self.THRESHOLD_DIST = 2 # Threshold to check distance of people @@ -62,25 +66,25 @@ class ActivityEvaluator: def eval_collective(self): """Parse Collective Activity Dataset and predict if people are talking or not""" - for fold in self.path_collective: - images = glob.glob(fold + '/*.jpg') - initial_path = os.path.join(fold, 'frame0001.jpg') - with open(initial_path, 'rb') as f: + for seq in self.sequences: + images = glob.glob(os.path.join(self.dir_data, 'images', seq + '*.jpg')) + initial_im = os.path.join(self.dir_data, 'images', seq + '_frame0001.jpg') + with open(initial_im, 'rb') as f: image = Image.open(f).convert('RGB') im_size = image.size + assert len(im_size) > 1, "image with frame0001 not available" for idx, im_path in enumerate(images): # Collect PifPaf files and calibration basename = os.path.basename(im_path) - extension = '.pifpaf.json' - path_pif = os.path.join(fold, basename + extension) + extension = '.predictions.json' + path_pif = os.path.join(self.dir_ann, basename + extension) annotations = open_annotations(path_pif) kk, _ = factory_for_gt(im_size, verbose=False) # Collect corresponding gt files (ys_gt: 1 or 0) - boxes_gt, ys_gt = parse_gt_collective(fold, path_pif) - + boxes_gt, ys_gt = parse_gt_collective(self.dir_data, seq, path_pif) # Run Monoloco dic_out, boxes = self.run_monoloco(annotations, kk, im_size=im_size) @@ -88,17 +92,19 @@ class ActivityEvaluator: matches = get_iou_matches(boxes, boxes_gt, iou_min=0.3) # Estimate activity - categories = [os.path.basename(fold)] * len(boxes_gt) + categories = [seq] * len(boxes_gt) # for compatibility with KITTI evaluation self.estimate_activity(dic_out, matches, ys_gt, categories=categories) # Print Results - cout_results(self.cnt, self.all_gt, self.all_pred, categories=self.folders_collective) + acc = accuracy_score(self.all_gt[seq], self.all_pred[seq]) + print(f"Accuracy of category {seq}: {100*acc:.2f}%") + cout_results(self.cnt, self.all_gt, self.all_pred, categories=self.sequences) def eval_kitti(self): """Parse KITTI Dataset and predict if people are talking or not""" from ..utils import factory_file - files = glob.glob(self.dir_gt_kitti + '/*.txt') + files = glob.glob(self.dir_data + '/*.txt') # files = [self.dir_gt_kitti + '/001782.txt'] assert files, "Empty directory" @@ -107,10 +113,10 @@ class ActivityEvaluator: # Collect PifPaf files and calibration basename, _ = os.path.splitext(os.path.basename(file)) path_calib = os.path.join(self.dir_kk, basename + '.txt') - annotations, kk, tt = factory_file(path_calib, self.dir_ann_kitti, basename) + annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename) # Collect corresponding gt files (ys_gt: 1 or 0) - path_gt = os.path.join(self.dir_gt_kitti, basename + '.txt') + path_gt = os.path.join(self.dir_data, basename + '.txt') boxes_gt, ys_gt, difficulties = parse_gt_kitti(path_gt) # Run Monoloco @@ -131,17 +137,16 @@ class ActivityEvaluator: angles = dic_out['angles'] dds = dic_out['dds_pred'] stds = dic_out['stds_ale'] - confs = dic_out['confs'] xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']] - # Count gt statistics + # Count gt statistics. (One element each gt) for key in categories: self.cnt['gt'][key] += 1 self.cnt['gt']['all'] += 1 for i_m, (idx, idx_gt) in enumerate(matches): - # Select keys to update resultd for Collective or KITTI + # Select keys to update results for Collective or KITTI keys = ('all', categories[idx_gt]) # Run social interactions rule @@ -166,10 +171,12 @@ class ActivityEvaluator: return dic_out, boxes -def parse_gt_collective(fold, path_pif): +def parse_gt_collective(dir_data, seq, path_pif): """Parse both gt and binary label (1/0) for talking or not""" - with open(os.path.join(fold, "annotations.txt"), "r") as ff: + path = os.path.join(dir_data, 'annotations', seq + '_annotations.txt') + + with open(path, "r") as ff: reader = csv.reader(ff, delimiter='\t') dic_frames = defaultdict(lambda: defaultdict(list)) for idx, line in enumerate(reader): @@ -212,17 +219,21 @@ def cout_results(cnt, all_gt, all_pred, categories=()): # Split by folders for collective activity for key in categories: acc = accuracy_score(all_gt[key], all_pred[key]) - print("Accuracy of category {}: {:.2f}% , Recall: {:.2f}%, #: {}, Predicted positive: {:.2f}%" + print("Accuracy of category {}: {:.2f}% , Recall: {:.2f}%, #: {}, Pred/Real positive: {:.1f}% / {:.1f}%" .format(key, acc * 100, cnt['pred'][key] / cnt['gt'][key]*100, cnt['pred'][key], - sum(all_gt[key]) / len(all_gt[key]) * 100)) + sum(all_pred[key]) / len(all_pred[key]) * 100, + sum(all_gt[key]) / len(all_gt[key]) * 100 + ) + ) # Final Accuracy acc = accuracy_score(all_gt['all'], all_pred['all']) + recall = cnt['pred']['all'] / cnt['gt']['all'] * 100 # only predictions that match a ground-truth are included print('-' * 80) - print("Final Accuracy: {:.2f}%".format(acc * 100)) + print(f"Final Accuracy: {acc * 100:.2f} Final Recall:{recall:.2f}") print('-' * 80) @@ -244,8 +255,8 @@ def convert_category(cat): def extract_frame_number(path): """extract frame number from path""" name = os.path.basename(path) - if name[5] == '0': - frame = name[6:9] + if name[11] == '0': + frame = name[12:15] else: - frame = name[5:9] + frame = name[11:15] return frame diff --git a/monstereo/eval/eval_kitti.py b/monstereo/eval/eval_kitti.py index 4542a30..d4d5866 100644 --- a/monstereo/eval/eval_kitti.py +++ b/monstereo/eval/eval_kitti.py @@ -25,41 +25,58 @@ class EvalKitti: '27', '29', '31', '49') ALP_THRESHOLDS = ('<0.5m', '<1m', '<2m') OUR_METHODS = ['geometric', 'monoloco', 'monoloco_pp', 'pose', 'reid', 'monstereo'] - METHODS_MONO = ['m3d', 'monopsr'] + METHODS_MONO = ['m3d', 'monopsr', 'smoke', 'monodis'] METHODS_STEREO = ['3dop', 'psf', 'pseudo-lidar', 'e2e', 'oc-stereo'] BASELINES = ['task_error', 'pixel_error'] HEADERS = ('method', '<0.5', '<1m', '<2m', 'easy', 'moderate', 'hard', 'all') CATEGORIES = ('pedestrian',) + methods = OUR_METHODS + METHODS_MONO + METHODS_STEREO - def __init__(self, thresh_iou_monoloco=0.3, thresh_iou_base=0.3, thresh_conf_monoloco=0.2, thresh_conf_base=0.5, - verbose=False): + # Set directories + main_dir = os.path.join('data', 'kitti') + dir_gt = os.path.join(main_dir, 'gt') + path_train = os.path.join('splits', 'kitti_train.txt') + path_val = os.path.join('splits', 'kitti_val.txt') + dir_logs = os.path.join('data', 'logs') + assert os.path.exists(dir_logs), "No directory to save final statistics" + dir_fig = os.path.join('data', 'figures') + assert os.path.exists(dir_logs), "No directory to save figures" - self.main_dir = os.path.join('data', 'kitti') - self.dir_gt = os.path.join(self.main_dir, 'gt') - self.methods = self.OUR_METHODS + self.METHODS_MONO + self.METHODS_STEREO - path_train = os.path.join('splits', 'kitti_train.txt') - path_val = os.path.join('splits', 'kitti_val.txt') - dir_logs = os.path.join('data', 'logs') - assert dir_logs, "No directory to save final statistics" + # Set thresholds to obtain comparable recalls + thresh_iou_monoloco = 0.3 + thresh_iou_base = 0.3 + thresh_conf_monoloco = 0.2 + thresh_conf_base = 0.5 + + def __init__(self, args): + + self.verbose = args.verbose + self.net = args.net + self.save = args.save + self.show = args.show now = datetime.datetime.now() now_time = now.strftime("%Y%m%d-%H%M")[2:] - self.path_results = os.path.join(dir_logs, 'eval-' + now_time + '.json') - self.verbose = verbose + self.path_results = os.path.join(self.dir_logs, 'eval-' + now_time + '.json') - self.dic_thresh_iou = {method: (thresh_iou_monoloco if method in self.OUR_METHODS - else thresh_iou_base) + # Set thresholds for comparable recalls + self.dic_thresh_iou = {method: (self.thresh_iou_monoloco if method in self.OUR_METHODS + else self.thresh_iou_base) for method in self.methods} - self.dic_thresh_conf = {method: (thresh_conf_monoloco if method in self.OUR_METHODS - else thresh_conf_base) + self.dic_thresh_conf = {method: (self.thresh_conf_monoloco if method in self.OUR_METHODS + else self.thresh_conf_base) for method in self.methods} - self.dic_thresh_conf['monopsr'] += 0.3 - self.dic_thresh_conf['e2e-pl'] = -100 # They don't have enough detections + + # Set thresholds to obtain comparable recall + self.dic_thresh_conf['monopsr'] += 0.4 + self.dic_thresh_conf['e2e-pl'] = -100 self.dic_thresh_conf['oc-stereo'] = -100 + self.dic_thresh_conf['smoke'] = -100 + self.dic_thresh_conf['monodis'] = -100 # Extract validation images for evaluation names_gt = tuple(os.listdir(self.dir_gt)) - _, self.set_val = split_training(names_gt, path_train, path_val) + _, self.set_val = split_training(names_gt, self.path_train, self.path_val) # self.set_val = ('002282.txt', ) @@ -68,10 +85,13 @@ class EvalKitti: = None self.cnt = 0 + # Filter methods with empty or non existent directory + filter_directories(self.main_dir, self.methods) + def run(self): """Evaluate Monoloco performances on ALP and ALE metrics""" - for self.category in self.CATEGORIES: + for self.category in self.CATEGORIES: # Initialize variables self.errors = defaultdict(lambda: defaultdict(list)) self.dic_stds = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) @@ -90,7 +110,7 @@ class EvalKitti: methods_out = defaultdict(tuple) # Save all methods for comparison # Count ground_truth: - boxes_gt, ys, truncs_gt, occs_gt = out_gt + boxes_gt, ys, truncs_gt, occs_gt = out_gt # pylint: disable=unbalanced-tuple-unpacking for idx, box in enumerate(boxes_gt): mode = get_difficulty(box, truncs_gt[idx], occs_gt[idx]) self.cnt_gt[mode] += 1 @@ -100,7 +120,6 @@ class EvalKitti: for method in self.methods: # Extract annotations dir_method = os.path.join(self.main_dir, method) - assert os.path.exists(dir_method), "directory of the method %s does not exists" % method path_method = os.path.join(dir_method, name) methods_out[method] = self._parse_txts(path_method, method=method) @@ -124,12 +143,14 @@ class EvalKitti: print('\n' + self.category.upper() + ':') self.show_statistics() - def printer(self, show, save): - if save or show: - show_results(self.dic_stats, self.CLUSTERS, show=show, save=save) - show_spread(self.dic_stats, self.CLUSTERS, show=show, save=save) - show_box_plot(self.errors, self.CLUSTERS, show=show, save=save) - show_task_error(show=show, save=save) + def printer(self): + if self.save or self.show: + show_results(self.dic_stats, self.CLUSTERS, self.net, self.dir_fig, show=self.show, save=self.save) + show_spread(self.dic_stats, self.CLUSTERS, self.net, self.dir_fig, show=self.show, save=self.save) + if self.net == 'monstero': + show_box_plot(self.errors, self.CLUSTERS, self.dir_fig, show=self.show, save=self.save) + else: + show_task_error(self.dir_fig, show=self.show, save=self.save) def _parse_txts(self, path, method): @@ -352,7 +373,7 @@ class EvalKitti: self.name = name # Iterate over each line of the gt file and save box location and distances out_gt = parse_ground_truth(path_gt, 'pedestrian') - boxes_gt, ys, truncs_gt, occs_gt = out_gt + boxes_gt, ys, truncs_gt, occs_gt = out_gt # pylint: disable=unbalanced-tuple-unpacking for label in ys: heights.append(label[4]) import numpy as np @@ -430,3 +451,14 @@ def extract_indices(idx_to_check, *args): def average(my_list): """calculate mean of a list""" return sum(my_list) / len(my_list) + + +def filter_directories(main_dir, methods): + for method in methods: + dir_method = os.path.join(main_dir, method) + if not os.path.exists(dir_method): + methods.remove(method) + print(f"\nMethod {method}. No directory found. Skipping it..") + elif not os.listdir(dir_method): + methods.remove(method) + print(f"\nMethod {method}. Directory is empty. Skipping it..") diff --git a/monstereo/eval/eval_variance.py b/monstereo/eval/eval_variance.py index d72fae9..2d44a15 100644 --- a/monstereo/eval/eval_variance.py +++ b/monstereo/eval/eval_variance.py @@ -1,5 +1,4 @@ - -# pylint: disable=too-many-statements,cyclic-import, too-many-branches +# pylint: disable=too-many-statements,too-many-branches,cyclic-import """Joints Analysis: Supplementary material of MonStereo""" diff --git a/monstereo/eval/generate_kitti.py b/monstereo/eval/generate_kitti.py index 0ee7283..a2469a8 100644 --- a/monstereo/eval/generate_kitti.py +++ b/monstereo/eval/generate_kitti.py @@ -1,5 +1,5 @@ -#pylint: disable=too-many-branches +# pylint: disable=too-many-branches """ Run MonoLoco/MonStereo and converts annotations into KITTI format @@ -22,39 +22,35 @@ from .reid_baseline import get_reid_features, ReID class GenerateKitti: - METHODS = ['monstereo', 'monoloco_pp', 'monoloco', 'geometric'] + dir_gt = os.path.join('data', 'kitti', 'gt') + dir_gt_new = os.path.join('data', 'kitti', 'gt_new') + dir_kk = os.path.join('data', 'kitti', 'calib') + dir_byc = '/data/lorenzo-data/kitti/object_detection/left' + monoloco_checkpoint = 'data/models/monoloco-190717-0952.pkl' + baselines = {'mono': [], 'stereo': []} - def __init__(self, model, dir_ann, p_dropout=0.2, n_dropout=0, hidden_size=1024): + def __init__(self, args): + + # Load Network + self.net = args.net + assert args.net in ('monstereo', 'monoloco_pp'), "net not recognized" - # Load monoloco use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") - - if 'monstereo' in self.METHODS: - self.monstereo = Loco(model=model, net='monstereo', device=device, n_dropout=n_dropout, p_dropout=p_dropout, - linear_size=hidden_size) - # model_mono_pp = 'data/models/monoloco-191122-1122.pkl' # KITTI_p - # model_mono_pp = 'data/models/monoloco-191018-1459.pkl' # nuScenes_p - model_mono_pp = 'data/models/stereoloco-200604-0949.pkl' # KITTI_pp - # model_mono_pp = 'data/models/stereoloco-200608-1550.pkl' # nuScenes_pp - - if 'monoloco_pp' in self.METHODS: - self.monoloco_pp = Loco(model=model_mono_pp, net='monoloco_pp', device=device, n_dropout=n_dropout, - p_dropout=p_dropout) - - if 'monoloco' in self.METHODS: - model_mono = 'data/models/monoloco-190717-0952.pkl' # KITTI - # model_mono = 'data/models/monoloco-190719-0923.pkl' # NuScenes - self.monoloco = Loco(model=model_mono, net='monoloco', device=device, n_dropout=n_dropout, - p_dropout=p_dropout, linear_size=256) - self.dir_ann = dir_ann + self.model = Loco( + model=args.model, + net=args.net, + device=device, + n_dropout=args.n_dropout, + p_dropout=args.dropout, + linear_size=args.hidden_size + ) # Extract list of pifpaf files in validation images - self.dir_gt = os.path.join('data', 'kitti', 'gt') - self.dir_gt_new = os.path.join('data', 'kitti', 'gt_new') - self.set_basename = factory_basename(dir_ann, self.dir_gt) - self.dir_kk = os.path.join('data', 'kitti', 'calib') - self.dir_byc = '/data/lorenzo-data/kitti/object_detection/left' + self.dir_ann = args.dir_ann + self.generate_official = args.generate_official + assert os.listdir(self.dir_ann), "Annotation directory is empty" + self.set_basename = factory_basename(args.dir_ann, self.dir_gt) # For quick testing # ------------------------------------------------------------------------------------------------------------ @@ -62,33 +58,48 @@ class GenerateKitti: # self.set_basename = ('002282',) # ------------------------------------------------------------------------------------------------------------ - # Calculate stereo baselines - # self.baselines = ['pose', 'reid'] - self.baselines = [] - self.cnt_disparity = defaultdict(int) - self.cnt_no_stereo = 0 - self.dir_images = os.path.join('data', 'kitti', 'images') - self.dir_images_r = os.path.join('data', 'kitti', 'images_r') - # ReID Baseline - if 'reid' in self.baselines: - weights_path = 'data/models/reid_model_market.pkl' - self.reid_net = ReID(weights_path=weights_path, device=device, num_classes=751, height=256, width=128) + # Add monocular and stereo baselines (they require monoloco as backbone) + if args.baselines: + + # Load MonoLoco + self.baselines['mono'] = ['monoloco', 'geometric'] + self.monoloco = Loco( + model=self.monoloco_checkpoint, + net='monoloco', + device=device, + n_dropout=args.n_dropout, + p_dropout=args.dropout, + linear_size=256 + ) + # Stereo baselines + if args.net == 'monstereo': + self.baselines['stereo'] = ['pose', 'reid'] + self.cnt_disparity = defaultdict(int) + self.cnt_no_stereo = 0 + self.dir_images = os.path.join('data', 'kitti', 'images') + self.dir_images_r = os.path.join('data', 'kitti', 'images_r') + + # ReID Baseline + weights_path = 'data/models/reid_model_market.pkl' + self.reid_net = ReID(weights_path=weights_path, device=device, num_classes=751, height=256, width=128) def run(self): """Run Monoloco and save txt files for KITTI evaluation""" cnt_ann = cnt_file = cnt_no_file = 0 - dir_out = {key: os.path.join('data', 'kitti', key) for key in self.METHODS} - print("\n") - for key in self.METHODS: - make_new_directory(dir_out[key]) - for key in self.baselines: - dir_out[key] = os.path.join('data', 'kitti', key) - make_new_directory(dir_out[key]) - print("Created empty output directory for {}".format(key)) + # Prepare empty folder + di = os.path.join('data', 'kitti', self.net) + make_new_directory(di) + dir_out = {self.net: di} - # Run monoloco over the list of images + for mode, names in self.baselines.items(): + for name in names: + di = os.path.join('data', 'kitti', name) + make_new_directory(di) + dir_out[name] = di + + # Run the model for basename in self.set_basename: path_calib = os.path.join(self.dir_kk, basename + '.txt') annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename) @@ -98,58 +109,60 @@ class GenerateKitti: annotations_r, _, _ = factory_file(path_calib, self.dir_ann, basename, mode='right') _, keypoints_r = preprocess_pifpaf(annotations_r, im_size=(1242, 374)) + if self.net == 'monstereo': + dic_out = self.model.forward(keypoints, kk, keypoints_r=keypoints_r) + elif self.net == 'monoloco_pp': + dic_out = self.model.forward(keypoints, kk) + + all_outputs = {self.net: [dic_out['xyzd'], dic_out['bi'], dic_out['epi'], + dic_out['yaw'], dic_out['h'], dic_out['w'], dic_out['l']]} + zzs = [float(el[2]) for el in dic_out['xyzd']] + + # Save txt files + params = [kk, tt] + path_txt = os.path.join(dir_out[self.net], basename + '.txt') + save_txts(path_txt, boxes, all_outputs[self.net], params, mode=self.net, cat=cat) cnt_ann += len(boxes) cnt_file += 1 - all_inputs, all_outputs = {}, {} - # STEREOLOCO - dic_out = self.monstereo.forward(keypoints, kk, keypoints_r=keypoints_r) - all_outputs['monstereo'] = [dic_out['xyzd'], dic_out['bi'], dic_out['epi'], - dic_out['yaw'], dic_out['h'], dic_out['w'], dic_out['l']] - - # MONOLOCO++ - if 'monoloco_pp' in self.METHODS: - dic_out = self.monoloco_pp.forward(keypoints, kk) - all_outputs['monoloco_pp'] = [dic_out['xyzd'], dic_out['bi'], dic_out['epi'], - dic_out['yaw'], dic_out['h'], dic_out['w'], dic_out['l']] - zzs = [float(el[2]) for el in dic_out['xyzd']] - - # MONOLOCO - if 'monoloco' in self.METHODS: + # MONO (+ STEREO BASELINES) + if self.baselines['mono']: + # MONOLOCO dic_out = self.monoloco.forward(keypoints, kk) zzs_geom, xy_centers = geometric_coordinates(keypoints, kk, average_y=0.48) all_outputs['monoloco'] = [dic_out['d'], dic_out['bi'], dic_out['epi']] + [zzs_geom, xy_centers] all_outputs['geometric'] = all_outputs['monoloco'] - params = [kk, tt] + # monocular baselines + for key in self.baselines['mono']: + path_txt = {key: os.path.join(dir_out[key], basename + '.txt')} + save_txts(path_txt[key], boxes, all_outputs[key], params, mode=key, cat=cat) - for key in self.METHODS: - path_txt = {key: os.path.join(dir_out[key], basename + '.txt')} - save_txts(path_txt[key], boxes, all_outputs[key], params, mode=key, cat=cat) + # stereo baselines + if self.baselines['stereo']: + all_inputs = {} + dic_xyz = self._run_stereo_baselines(basename, boxes, keypoints, zzs, path_calib) + for key in dic_xyz: + all_outputs[key] = all_outputs['monoloco'].copy() + all_outputs[key][0] = dic_xyz[key] + all_inputs[key] = boxes - # STEREO BASELINES - if self.baselines: - dic_xyz = self._run_stereo_baselines(basename, boxes, keypoints, zzs, path_calib) - - for key in dic_xyz: - all_outputs[key] = all_outputs['monoloco'].copy() - all_outputs[key][0] = dic_xyz[key] - all_inputs[key] = boxes - - path_txt[key] = os.path.join(dir_out[key], basename + '.txt') - save_txts(path_txt[key], all_inputs[key], all_outputs[key], params, mode='baseline', cat=cat) + path_txt[key] = os.path.join(dir_out[key], basename + '.txt') + save_txts(path_txt[key], all_inputs[key], all_outputs[key], params, + mode='baseline', + cat=cat) print("\nSaved in {} txt {} annotations. Not found {} images".format(cnt_file, cnt_ann, cnt_no_file)) - if 'monstereo' in self.METHODS: + if self.net == 'monstereo': print("STEREO:") - for key in self.baselines: + for key in self.baselines['stereo']: print("Annotations corrected using {} baseline: {:.1f}%".format( key, self.cnt_disparity[key] / cnt_ann * 100)) - print("Maximum possible stereo associations: {:.1f}%".format(self.cnt_disparity['max'] / cnt_ann * 100)) print("Not found {}/{} stereo files".format(self.cnt_no_stereo, cnt_file)) - create_empty_files(dir_out) # Create empty files for official evaluation + if self.generate_official: + create_empty_files(dir_out, self.net) # Create empty files for official evaluation def _run_stereo_baselines(self, basename, boxes, keypoints, zzs, path_calib): @@ -165,14 +178,14 @@ class GenerateKitti: path_image = os.path.join(self.dir_images, basename + '.png') path_image_r = os.path.join(self.dir_images_r, basename + '.png') reid_features = get_reid_features(self.reid_net, boxes, boxes_r, path_image, path_image_r) - dic_zzs, cnt = baselines_association(self.baselines, zzs, keypoints, keypoints_r, reid_features) + dic_zzs, cnt = baselines_association(self.baselines['stereo'], zzs, keypoints, keypoints_r, reid_features) for key in cnt: self.cnt_disparity[key] += cnt[key] else: self.cnt_no_stereo += 1 - dic_zzs = {key: zzs for key in self.baselines} + dic_zzs = {key: zzs for key in self.baselines['stereo']} # Combine the stereo zz with x, y from 2D detection (no MonoLoco involved) dic_xyz = defaultdict(list) @@ -227,8 +240,9 @@ def save_txts(path_txt, all_inputs, all_outputs, all_params, mode='monoloco', ca conf_scale = 0.03 elif mode == 'monoloco_pp': conf_scale = 0.033 + # conf_scale = 0.035 # nuScenes for having same recall else: - conf_scale = 0.055 + conf_scale = 0.05 conf = conf_scale * (uv_box[-1]) / (bi / math.sqrt(xx ** 2 + yy ** 2 + zz ** 2)) output_list = [alpha] + uv_box[:-1] + hwl + cam_0 + [ry, conf, bi, epi] @@ -244,11 +258,10 @@ def save_txts(path_txt, all_inputs, all_outputs, all_params, mode='monoloco', ca ff.write("\n") -def create_empty_files(dir_out): +def create_empty_files(dir_out, net): """Create empty txt files to run official kitti metrics on MonStereo and all other methods""" - methods = ['pseudo-lidar', 'monopsr', '3dop', 'm3d', 'oc-stereo', 'e2e'] - methods = [] + methods = ['pseudo-lidar', 'monopsr', '3dop', 'm3d', 'oc-stereo', 'e2e', 'monodis', 'smoke'] dirs = [os.path.join('data', 'kitti', method) for method in methods] dirs_orig = [os.path.join('data', 'kitti', method + '-orig') for method in methods] @@ -263,8 +276,7 @@ def create_empty_files(dir_out): # If the file exits, rewrite in new folder, otherwise create empty file read_and_rewrite(path_orig, path) - for method in ('monoloco_pp', 'monstereo'): - for i in range(7481): - name = "0" * (6 - len(str(i))) + str(i) + '.txt' - ff = open(os.path.join(dir_out[method], name), "a+") - ff.close() + for i in range(7481): + name = "0" * (6 - len(str(i))) + str(i) + '.txt' + ff = open(os.path.join(dir_out[net], name), "a+") + ff.close() diff --git a/monstereo/eval/reid_baseline.py b/monstereo/eval/reid_baseline.py index 8dd4aee..d54faa8 100644 --- a/monstereo/eval/reid_baseline.py +++ b/monstereo/eval/reid_baseline.py @@ -29,7 +29,7 @@ def get_reid_features(reid_net, boxes, boxes_r, path_image, path_image_r): class ReID(object): def __init__(self, weights_path, device, num_classes=751, height=256, width=128): - super(ReID, self).__init__() + super().__init__() torch.manual_seed(1) self.device = device @@ -90,7 +90,7 @@ class ReID(object): class ResNet50(nn.Module): def __init__(self, num_classes, loss): - super(ResNet50, self).__init__() + super().__init__() self.loss = loss resnet50 = torchvision.models.resnet50(pretrained=True) self.base = nn.Sequential(*list(resnet50.children())[:-2]) diff --git a/monstereo/network/__init__.py b/monstereo/network/__init__.py index 216e217..0479941 100644 --- a/monstereo/network/__init__.py +++ b/monstereo/network/__init__.py @@ -1,4 +1,3 @@ from .net import Loco -from .pifpaf import PifPaf, ImageList from .process import unnormalize_bi, extract_outputs, extract_labels, extract_labels_aux diff --git a/monstereo/network/architectures.py b/monstereo/network/architectures.py index b8ec94a..9e829a0 100644 --- a/monstereo/network/architectures.py +++ b/monstereo/network/architectures.py @@ -6,7 +6,7 @@ import torch.nn as nn class MonStereoModel(nn.Module): def __init__(self, input_size, output_size=2, linear_size=512, p_dropout=0.2, num_stage=3, device='cuda'): - super(MonStereoModel, self).__init__() + super().__init__() self.num_stage = num_stage self.stereo_size = input_size @@ -73,7 +73,7 @@ class MonStereoModel(nn.Module): class MyLinearSimple(nn.Module): def __init__(self, linear_size, p_dropout=0.5): - super(MyLinearSimple, self).__init__() + super().__init__() self.l_size = linear_size self.relu = nn.ReLU(inplace=True) @@ -109,7 +109,7 @@ class MonolocoModel(nn.Module): """ def __init__(self, input_size, output_size=2, linear_size=256, p_dropout=0.2, num_stage=3): - super(MonolocoModel, self).__init__() + super().__init__() self.input_size = input_size self.output_size = output_size @@ -147,7 +147,7 @@ class MonolocoModel(nn.Module): class MyLinear(nn.Module): def __init__(self, linear_size, p_dropout=0.5): - super(MyLinear, self).__init__() + super().__init__() self.l_size = linear_size self.relu = nn.ReLU(inplace=True) diff --git a/monstereo/network/net.py b/monstereo/network/net.py index b08b619..67b7ac9 100644 --- a/monstereo/network/net.py +++ b/monstereo/network/net.py @@ -56,7 +56,7 @@ class Loco: output_size=output_size) else: self.model = MonStereoModel(p_dropout=p_dropout, input_size=input_size, output_size=output_size, - linear_size=linear_size, device=self.device) + linear_size=linear_size, device=self.device) self.model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage)) else: @@ -163,7 +163,7 @@ class Loco: print("found {} matches with ground-truth".format(len(matches))) # Keep track of instances non-matched - idxs_matches = (el[0] for el in matches) + idxs_matches = [el[0] for el in matches] not_matches = [idx for idx, _ in enumerate(boxes) if idx not in idxs_matches] else: diff --git a/monstereo/network/pifpaf.py b/monstereo/network/pifpaf.py deleted file mode 100644 index 6209c04..0000000 --- a/monstereo/network/pifpaf.py +++ /dev/null @@ -1,102 +0,0 @@ - -import glob - -import numpy as np -import torchvision -import torch -from PIL import Image, ImageFile -from openpifpaf.network import nets -from openpifpaf import decoder - -from .process import image_transform - - -class ImageList(torch.utils.data.Dataset): - """It defines transformations to apply to images and outputs of the dataloader""" - def __init__(self, image_paths, scale): - self.image_paths = image_paths - self.image_paths.sort() - self.scale = scale - - def __getitem__(self, index): - image_path = self.image_paths[index] - ImageFile.LOAD_TRUNCATED_IMAGES = True - with open(image_path, 'rb') as f: - image = Image.open(f).convert('RGB') - - if self.scale > 1.01 or self.scale < 0.99: - image = torchvision.transforms.functional.resize(image, - (round(self.scale * image.size[1]), - round(self.scale * image.size[0])), - interpolation=Image.BICUBIC) - # PIL images are not iterables - original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1 - image = image_transform(image) - - return image_path, original_image, image - - def __len__(self): - return len(self.image_paths) - - -def factory_from_args(args): - - # Merge the model_pifpaf argument - if not args.checkpoint: - args.checkpoint = 'resnet152' # Default model Resnet 152 - # glob - if args.glob: - args.images += glob.glob(args.glob) - if not args.images: - raise Exception("no image files given") - - # add args.device - args.device = torch.device('cpu') - args.pin_memory = False - if torch.cuda.is_available(): - args.device = torch.device('cuda') - args.pin_memory = True - - # Add num_workers - args.loader_workers = 8 - - # Add visualization defaults - args.figure_width = 10 - args.dpi_factor = 1.0 - - return args - - -class PifPaf: - def __init__(self, args): - """Instanciate the mdodel""" - factory_from_args(args) - model_pifpaf, _ = nets.factory_from_args(args) - model_pifpaf = model_pifpaf.to(args.device) - self.processor = decoder.factory_from_args(args, model_pifpaf) - self.keypoints_whole = [] - - # Scale the keypoints to the original image size for printing (if not webcam) - self.scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3) - - def fields(self, processed_images): - """Encoder for pif and paf fields""" - fields_batch = self.processor.fields(processed_images) - return fields_batch - - def forward(self, image, processed_image_cpu, fields): - """Decoder, from pif and paf fields to keypoints""" - self.processor.set_cpu_image(image, processed_image_cpu) - keypoint_sets, scores = self.processor.keypoint_sets(fields) - - if keypoint_sets.size > 0: - self.keypoints_whole.append(np.around((keypoint_sets / self.scale_np), 1) - .reshape(keypoint_sets.shape[0], -1).tolist()) - - pifpaf_out = [ - {'keypoints': np.around(kps / self.scale_np, 1).reshape(-1).tolist(), - 'bbox': [np.min(kps[:, 0]) / self.scale_np[0, 0], np.min(kps[:, 1]) / self.scale_np[0, 0], - np.max(kps[:, 0]) / self.scale_np[0, 0], np.max(kps[:, 1]) / self.scale_np[0, 0]]} - for kps in keypoint_sets - ] - return keypoint_sets, scores, pifpaf_out diff --git a/monstereo/network/process.py b/monstereo/network/process.py index d1a4760..9b12378 100644 --- a/monstereo/network/process.py +++ b/monstereo/network/process.py @@ -82,7 +82,7 @@ def factory_for_gt(im_size, name=None, path_gt=None, verbose=True): dic_gt = None x_factor = im_size[0] / 1600 y_factor = im_size[1] / 900 - pixel_factor = (x_factor + y_factor) / 2 # 1.7 for MOT + pixel_factor = (x_factor + y_factor) / 1.75 # 1.75 for MOT # pixel_factor = 1 if im_size[0] / im_size[1] > 2.5: kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration @@ -274,7 +274,6 @@ def extract_outputs(outputs, tasks=()): if outputs.shape[1] == 10: dic_out['aux'] = torch.sigmoid(dic_out['aux']) - return dic_out diff --git a/monstereo/predict.py b/monstereo/predict.py index d869386..3c5f342 100644 --- a/monstereo/predict.py +++ b/monstereo/predict.py @@ -2,144 +2,171 @@ # pylint: disable=too-many-statements, too-many-branches, undefined-loop-variable import os +import glob import json +import logging from collections import defaultdict import torch -from PIL import Image +import PIL +import openpifpaf +import openpifpaf.datasets as datasets +from openpifpaf.predict import processor_factory, preprocess_factory +from openpifpaf import decoder, network, visualizer, show from .visuals.printer import Printer -from .visuals.pifpaf_show import KeypointPainter, image_canvas -from .network import PifPaf, ImageList, Loco +from .network import Loco from .network.process import factory_for_gt, preprocess_pifpaf +from .activity import show_social + +LOG = logging.getLogger(__name__) + + +def factory_from_args(args): + + # Data + if args.glob: + args.images += glob.glob(args.glob) + if not args.images: + raise Exception("no image files given") + + # Model + if not args.checkpoint: + args.checkpoint = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl' # Default model + + # Devices + args.device = torch.device('cpu') + args.disable_cuda = False + args.pin_memory = False + if torch.cuda.is_available(): + args.device = torch.device('cuda') + args.pin_memory = True + args.loader_workers = 8 + + # Add visualization defaults + args.figure_width = 10 + args.dpi_factor = 1.0 + + if args.net == 'monstereo': + args.batch_size = 2 + else: + args.batch_size = 1 + + # Make default pifpaf argument + args.force_complete_pose = True + print("Force complete pose is active") + + # Configure + decoder.configure(args) + network.configure(args) + show.configure(args) + visualizer.configure(args) + + return args def predict(args): cnt = 0 + args = factory_from_args(args) # Load Models - pifpaf = PifPaf(args) - assert args.mode in ('mono', 'stereo', 'pifpaf') + assert args.net in ('monoloco_pp', 'monstereo', 'pifpaf') - if 'mono' in args.mode: - monoloco = Loco(model=args.model, net='monoloco_pp', - device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - - if 'stereo' in args.mode: - monstereo = Loco(model=args.model, net='monstereo', - device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) + if args.net in ('monoloco_pp', 'monstereo'): + net = Loco(model=args.model, net=args.net, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) # data - data = ImageList(args.images, scale=args.scale) - if args.mode == 'stereo': + processor, model = processor_factory(args) + preprocess = preprocess_factory(args) + + # data + data = datasets.ImageList(args.images, preprocess=preprocess) + if args.net == 'monstereo': assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting" - bs = 2 - else: - bs = 1 + data_loader = torch.utils.data.DataLoader( - data, batch_size=bs, shuffle=False, - pin_memory=args.pin_memory, num_workers=args.loader_workers) + data, batch_size=args.batch_size, shuffle=False, + pin_memory=False, collate_fn=datasets.collate_images_anns_meta) - for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader): - images = image_tensors.permute(0, 2, 3, 1) + # visualizers + annotation_painter = openpifpaf.show.AnnotationPainter() - processed_images = processed_images_cpu.to(args.device, non_blocking=True) - fields_batch = pifpaf.fields(processed_images) + for batch_i, (image_tensors_batch, _, meta_batch) in enumerate(data_loader): + pred_batch = processor.batch(model, image_tensors_batch, device=args.device) - # unbatch stereo pair - for ii, (image_path, image, processed_image_cpu, fields) in enumerate(zip( - image_paths, images, processed_images_cpu, fields_batch)): + # unbatch (only for MonStereo) + for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)): + LOG.info('batch %d: %s', batch_i, meta['file_name']) + pred = preprocess.annotations_inverse(pred, meta) if args.output_directory is None: - splits = os.path.split(image_paths[0]) + splits = os.path.split(meta['file_name']) output_path = os.path.join(splits[0], 'out_' + splits[1]) else: - file_name = os.path.basename(image_paths[0]) + file_name = os.path.basename(meta['file_name']) output_path = os.path.join(args.output_directory, 'out_' + file_name) - print('image', idx, image_path, output_path) - keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields) + print('image', batch_i, meta['file_name'], output_path) + pifpaf_out = [ann.json_data() for ann in pred] - if ii == 0: - pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing - images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor and monoloco original image + if idx == 0: + pifpaf_outputs = pred # to only print left image for stereo pifpaf_outs = {'left': pifpaf_out} - image_path_l = image_path + with open(meta_batch[0]['file_name'], 'rb') as f: + cpu_image = PIL.Image.open(f).convert('RGB') else: pifpaf_outs['right'] = pifpaf_out - if args.mode in ('stereo', 'mono'): - # Extract calibration matrix and ground truth file if present - with open(image_path_l, 'rb') as f: - pil_image = Image.open(f).convert('RGB') - images_outputs.append(pil_image) + # 3D Predictions + if args.net in ('monoloco_pp', 'monstereo'): - im_name = os.path.basename(image_path_l) - im_size = (float(image.size()[1] / args.scale), float(image.size()[0] / args.scale)) # Original + im_name = os.path.basename(meta['file_name']) + im_size = (cpu_image.size[0], cpu_image.size[1]) # Original kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt) # Preprocess pifpaf outputs and run monoloco boxes, keypoints = preprocess_pifpaf(pifpaf_outs['left'], im_size, enlarge_boxes=False) - if args.mode == 'mono': + if args.net == 'monoloco_pp': print("Prediction with MonoLoco++") - dic_out = monoloco.forward(keypoints, kk) - dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt) + dic_out = net.forward(keypoints, kk) + dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=not args.social_distance) + + if args.social_distance: + show_social(args, cpu_image, output_path, pifpaf_out, dic_out) else: print("Prediction with MonStereo") boxes_r, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size) - dic_out = monstereo.forward(keypoints, kk, keypoints_r=keypoints_r) - dic_out = monstereo.post_process(dic_out, boxes, keypoints, kk, dic_gt) + dic_out = net.forward(keypoints, kk, keypoints_r=keypoints_r) + dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt) else: dic_out = defaultdict(list) kk = None - factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk) + if not args.social_distance: + factory_outputs(args, annotation_painter, cpu_image, output_path, pifpaf_outputs, + dic_out=dic_out, kk=kk) print('Image {}\n'.format(cnt) + '-' * 120) cnt += 1 -def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None): +def factory_outputs(args, annotation_painter, cpu_image, output_path, pred, dic_out=None, kk=None): """Output json files or images according to the choice""" # Save json file - if args.mode == 'pifpaf': - keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:] + if args.net == 'pifpaf': + with openpifpaf.show.image_canvas(cpu_image, output_path) as ax: + annotation_painter.annotations(ax, pred) - # Visualizer - keypoint_painter = KeypointPainter(show_box=False) - skeleton_painter = KeypointPainter(show_box=False, color_connections=True, markersize=1, linewidth=4) - - if 'json' in args.output_types and keypoint_sets.size > 0: - with open(output_path + '.pifpaf.json', 'w') as f: - json.dump(pifpaf_out, f) - - if 'keypoints' in args.output_types: - with image_canvas(images_outputs[0], - output_path + '.keypoints.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - keypoint_painter.keypoints(ax, keypoint_sets) - - if 'skeleton' in args.output_types: - with image_canvas(images_outputs[0], - output_path + '.skeleton.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) - - else: if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])): print(output_path) if dic_out['boxes']: # Only print in case of detections - printer = Printer(images_outputs[1], output_path, kk, args) - figures, axes = printer.factory_axes() - printer.draw(figures, axes, dic_out, images_outputs[1]) + printer = Printer(cpu_image, output_path, kk, args) + figures, axes = printer.factory_axes(dic_out) + printer.draw(figures, axes, cpu_image) if 'json' in args.output_types: with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff: diff --git a/monstereo/prep/prep_kitti.py b/monstereo/prep/prep_kitti.py index 8f2e574..dcfe601 100644 --- a/monstereo/prep/prep_kitti.py +++ b/monstereo/prep/prep_kitti.py @@ -24,10 +24,9 @@ from .transforms import flip_inputs, flip_labels, height_augmentation class PreprocessKitti: """Prepare arrays with same format as nuScenes preprocessing but using ground truth txt files""" - # AV_W = 0.68 - # AV_L = 0.75 - # AV_H = 1.72 - # WLH_STD = 0.1 + dir_gt = os.path.join('data', 'kitti', 'gt') + dir_images = '/data/lorenzo-data/kitti/original_images/training/image_2' + dir_byc_l = '/data/lorenzo-data/kitti/object_detection/left' # SOCIAL DISTANCING PARAMETERS THRESHOLD_DIST = 2 # Threshold to check distance of people @@ -51,9 +50,6 @@ class PreprocessKitti: self.dir_ann = dir_ann self.iou_min = iou_min self.monocular = monocular - self.dir_gt = os.path.join('data', 'kitti', 'gt') - self.dir_images = '/data/lorenzo-data/kitti/original_images/training/image_2' - self.dir_byc_l = '/data/lorenzo-data/kitti/object_detection/left' self.names_gt = tuple(os.listdir(self.dir_gt)) self.dir_kk = os.path.join('data', 'kitti', 'calib') self.list_gt = glob.glob(self.dir_gt + '/*.txt') @@ -97,7 +93,9 @@ class PreprocessKitti: category = 'pedestrian' # Extract ground truth - boxes_gt, ys, _, _ = parse_ground_truth(path_gt, category=category, spherical=True) + boxes_gt, ys, _, _ = parse_ground_truth(path_gt, # pylint: disable=unbalanced-tuple-unpacking + category=category, + spherical=True) cnt_gt[phase] += len(boxes_gt) cnt_files += 1 cnt_files_ped += min(len(boxes_gt), 1) # if no boxes 0 else 1 @@ -170,7 +168,7 @@ class PreprocessKitti: self.dic_jo[phase]['X'].append(inp) self.dic_jo[phase]['Y'].append(lab) self.dic_jo[phase]['names'].append(name) # One image name for each annotation - append_cluster(self.dic_jo, phase, inp, lab, keypoint) + append_cluster(self.dic_jo, phase, inp, lab, keypoint.tolist()) cnt_mono[phase] += 1 cnt_tot += 1 diff --git a/monstereo/prep/preprocess_nu.py b/monstereo/prep/preprocess_nu.py index b27fa78..4bb8caa 100644 --- a/monstereo/prep/preprocess_nu.py +++ b/monstereo/prep/preprocess_nu.py @@ -87,7 +87,7 @@ class PreprocessNuscenes: while not current_token == "": sample_dic = self.nusc.get('sample', current_token) cnt_samples += 1 - + # if (cnt_samples % 4 == 0) and (cnt_ann < 3000): # Extract all the sample_data tokens for each sample for cam in self.CAMERAS: sd_token = sample_dic['data'][cam] @@ -105,7 +105,7 @@ class PreprocessNuscenes: self.dic_names[basename + '.jpg']['K'] = copy.deepcopy(kk) # Run IoU with pifpaf detections and save - path_pif = os.path.join(self.dir_ann, name + '.pifpaf.json') + path_pif = os.path.join(self.dir_ann, name + '.predictions.json') exists = os.path.isfile(path_pif) if exists: @@ -114,7 +114,6 @@ class PreprocessNuscenes: boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900)) else: continue - if keypoints: matches = get_iou_matches(boxes, boxes_gt, self.iou_min) for (idx, idx_gt) in matches: @@ -130,7 +129,6 @@ class PreprocessNuscenes: append_cluster(self.dic_jo, phase, inp, lab, keypoint) cnt_ann += 1 sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t') - current_token = sample_dic['next'] with open(os.path.join(self.path_joints), 'w') as f: @@ -139,7 +137,7 @@ class PreprocessNuscenes: json.dump(self.dic_names, f) end = time.time() - extract_box_average(self.dic_jo['train']['boxes_3d']) + # extract_box_average(self.dic_jo['train']['boxes_3d']) print("\nSaved {} annotations for {} samples in {} scenes. Total time: {:.1f} minutes" .format(cnt_ann, cnt_samples, cnt_scenes, (end-start)/60)) print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints)) diff --git a/monstereo/run.py b/monstereo/run.py index a2f8cb0..8957c33 100644 --- a/monstereo/run.py +++ b/monstereo/run.py @@ -2,8 +2,7 @@ import argparse -from openpifpaf.network import nets -from openpifpaf import decoder +from openpifpaf import decoder, network, visualizer, show def cli(): @@ -37,15 +36,18 @@ def cli(): help='what to output: json keypoints skeleton for Pifpaf' 'json bird front or multi for MonStereo') predict_parser.add_argument('--no_save', help='to show images', action='store_true') - predict_parser.add_argument('--show', help='to show images', action='store_true') - predict_parser.add_argument('--dpi', help='image resolution', type=int, default=100) + predict_parser.add_argument('--dpi', help='image resolution', type=int, default=150) + predict_parser.add_argument('--long-edge', default=None, type=int, + help='rescale the long side of the image (aspect ratio maintained)') - # Pifpaf - nets.cli(predict_parser) - decoder.cli(predict_parser, force_complete_pose=True, instance_threshold=0.15) - predict_parser.add_argument('--scale', default=1.0, type=float, help='change the scale of the image to preprocess') + # Pifpaf parsers + decoder.cli(predict_parser) + network.cli(predict_parser) + show.cli(predict_parser) + visualizer.cli(predict_parser) # Monoloco + predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo') predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True) predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512) predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization', @@ -57,18 +59,15 @@ def cli(): predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true') # Social distancing and social interactions - predict_parser.add_argument('--social', help='social', action='store_true') - predict_parser.add_argument('--activity', help='activity', action='store_true') - predict_parser.add_argument('--json_dir', help='for social') + predict_parser.add_argument('--social_distance', help='social', action='store_true') predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25) - predict_parser.add_argument('--threshold_dist', type=float, help='min distance of people', default=2) - predict_parser.add_argument('--margin', type=float, help='conservative for noise in orientation', default=1.5) - predict_parser.add_argument('--radii', type=tuple, help='o-space radii', default=(0.25, 1, 2)) + predict_parser.add_argument('--threshold_dist', type=float, help='min distance of people', default=2.5) + predict_parser.add_argument('--radii', type=tuple, help='o-space radii', default=(0.3, 0.5, 1)) # Training training_parser.add_argument('--joints', help='Json file with input joints', default='data/arrays/joints-nuscenes_teaser-190513-1846.json') - training_parser.add_argument('--save', help='whether to not save model and log file', action='store_true') + training_parser.add_argument('--no_save', help='to not save model and log file', action='store_true') training_parser.add_argument('-e', '--epochs', type=int, help='number of epochs to train for', default=500) training_parser.add_argument('--bs', type=int, default=512, help='input batch size') training_parser.add_argument('--monocular', help='whether to train monoloco', action='store_true') @@ -81,7 +80,9 @@ def cli(): training_parser.add_argument('--hyp', help='run hyperparameters tuning', action='store_true') training_parser.add_argument('--multiplier', type=int, help='Size of the grid of hyp search', default=1) training_parser.add_argument('--r_seed', type=int, help='specify the seed for training and hyp tuning', default=1) - training_parser.add_argument('--activity', help='new', action='store_true') + training_parser.add_argument('--print_loss', help='print training and validation losses', action='store_true') + training_parser.add_argument('--auto_tune_mtl', help='whether to use uncertainty to autotune losses', + action='store_true') # Evaluation eval_parser.add_argument('--dataset', help='datasets to evaluate, kitti or nuscenes', default='kitti') @@ -102,6 +103,9 @@ def cli(): eval_parser.add_argument('--variance', help='evaluate keypoints variance', action='store_true') eval_parser.add_argument('--activity', help='evaluate activities', action='store_true') eval_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo') + eval_parser.add_argument('--baselines', help='whether to evaluate stereo baselines', action='store_true') + eval_parser.add_argument('--generate_official', help='whether to add empty txt files for official evaluation', + action='store_true') args = parser.parse_args() return args @@ -110,10 +114,7 @@ def cli(): def main(): args = cli() if args.command == 'predict': - if args.activity: - from .activity import predict - else: - from .predict import predict + from .predict import predict predict(args) elif args.command == 'prep': @@ -135,14 +136,11 @@ def main(): hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs, monocular=args.monocular, dropout=args.dropout, multiplier=args.multiplier, r_seed=args.r_seed) - hyp_tuning.train() + hyp_tuning.train(args) else: from .train import Trainer - training = Trainer(joints=args.joints, epochs=args.epochs, bs=args.bs, - monocular=args.monocular, dropout=args.dropout, lr=args.lr, sched_step=args.sched_step, - n_stage=args.n_stage, sched_gamma=args.sched_gamma, hidden_size=args.hidden_size, - r_seed=args.r_seed, save=args.save) + training = Trainer(args) _ = training.train() _ = training.evaluate() @@ -169,19 +167,18 @@ def main(): else: if args.generate: from .eval.generate_kitti import GenerateKitti - kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout, - hidden_size=args.hidden_size) + kitti_txt = GenerateKitti(args) kitti_txt.run() if args.dataset == 'kitti': from .eval import EvalKitti - kitti_eval = EvalKitti(verbose=args.verbose) + kitti_eval = EvalKitti(args) kitti_eval.run() - kitti_eval.printer(show=args.show, save=args.save) + kitti_eval.printer() elif 'nuscenes' in args.dataset: from .train import Trainer - training = Trainer(joints=args.joints, hidden_size=args.hidden_size) + training = Trainer(args) _ = training.evaluate(load=True, model=args.model, debug=False) else: diff --git a/monstereo/train/hyp_tuning.py b/monstereo/train/hyp_tuning.py index 7fa1bf5..0237260 100644 --- a/monstereo/train/hyp_tuning.py +++ b/monstereo/train/hyp_tuning.py @@ -61,7 +61,7 @@ class HypTuning: # plt.hist(self.lr_list, bins=50) # plt.show() - def train(self): + def train(self, args): """Train multiple times using log-space random search""" best_acc_val = 20 @@ -76,10 +76,7 @@ class HypTuning: hidden_size = self.hidden_list[idx] n_stage = self.n_stage_list[idx] - training = Trainer(joints=self.joints, epochs=self.num_epochs, - bs=bs, monocular=self.monocular, dropout=self.dropout, lr=lr, sched_step=sched_step, - sched_gamma=sched_gamma, hidden_size=hidden_size, n_stage=n_stage, - save=False, print_loss=False, r_seed=self.r_seed) + training = Trainer(args) best_epoch = training.train() dic_err, model = training.evaluate() diff --git a/monstereo/train/losses.py b/monstereo/train/losses.py index 3abe5dc..7e042dd 100644 --- a/monstereo/train/losses.py +++ b/monstereo/train/losses.py @@ -27,7 +27,7 @@ class AutoTuneMultiTaskLoss(torch.nn.Module): loss_values = [lam * l(o, g) / (2.0 * (log_sigma.exp() ** 2)) for lam, log_sigma, l, o, g in zip(self.lambdas, self.log_sigmas, self.losses, out, gt_out)] - auto_reg = [log_sigma for log_sigma in self.log_sigmas] + auto_reg = [log_sigma for log_sigma in self.log_sigmas] # pylint: disable=unnecessary-comprehension loss = sum(loss_values) + sum(auto_reg) if phase == 'val': @@ -70,7 +70,7 @@ class MultiTaskLoss(torch.nn.Module): class CompositeLoss(torch.nn.Module): def __init__(self, tasks): - super(CompositeLoss, self).__init__() + super().__init__() self.tasks = tasks self.multi_loss_tr = {task: (LaplacianLoss() if task == 'd' @@ -98,7 +98,7 @@ class CompositeLoss(torch.nn.Module): class LaplacianLoss(torch.nn.Module): """1D Gaussian with std depending on the absolute distance""" def __init__(self, size_average=True, reduce=True, evaluate=False): - super(LaplacianLoss, self).__init__() + super().__init__() self.size_average = size_average self.reduce = reduce self.evaluate = evaluate @@ -140,7 +140,7 @@ class GaussianLoss(torch.nn.Module): """1D Gaussian with std depending on the absolute distance """ def __init__(self, device, size_average=True, reduce=True, evaluate=False): - super(GaussianLoss, self).__init__() + super().__init__() self.size_average = size_average self.reduce = reduce self.evaluate = evaluate diff --git a/monstereo/train/trainer.py b/monstereo/train/trainer.py index 2990921..cdbb632 100644 --- a/monstereo/train/trainer.py +++ b/monstereo/train/trainer.py @@ -34,10 +34,9 @@ class Trainer: tasks = ('d', 'x', 'y', 'h', 'w', 'l', 'ori', 'aux') val_task = 'd' lambdas = (1, 1, 1, 1, 1, 1, 1, 1) + clusters = ['10', '20', '30', '40'] - def __init__(self, joints, epochs=100, bs=256, dropout=0.2, lr=0.002, - sched_step=20, sched_gamma=1, hidden_size=256, n_stage=3, r_seed=1, n_samples=100, - monocular=False, save=False, print_loss=True): + def __init__(self, args): """ Initialize directories, load the data and parameters for the training """ @@ -49,31 +48,29 @@ class Trainer: dir_logs = os.path.join('data', 'logs') if not os.path.exists(dir_logs): warnings.warn("Warning: default logs directory not found") - assert os.path.exists(joints), "Input file not found" + assert os.path.exists(args.joints), "Input file not found" - self.joints = joints - self.num_epochs = epochs - self.save = save - self.print_loss = print_loss - self.monocular = monocular - self.lr = lr - self.sched_step = sched_step - self.sched_gamma = sched_gamma - self.clusters = ['10', '20', '30', '50', '>50'] - self.hidden_size = hidden_size - self.n_stage = n_stage + self.joints = args.joints + self.num_epochs = args.epochs + self.no_save = args.no_save + self.print_loss = args.print_loss + self.monocular = args.monocular + self.lr = args.lr + self.sched_step = args.sched_step + self.sched_gamma = args.sched_gamma + self.hidden_size = args.hidden_size + self.n_stage = args.n_stage self.dir_out = dir_out - self.n_samples = n_samples - self.r_seed = r_seed - self.auto_tune_mtl = False + self.r_seed = args.r_seed + self.auto_tune_mtl = args.auto_tune_mtl # Select the device use_cuda = torch.cuda.is_available() self.device = torch.device("cuda" if use_cuda else "cpu") print('Device: ', self.device) - torch.manual_seed(r_seed) + torch.manual_seed(self.r_seed) if use_cuda: - torch.cuda.manual_seed(r_seed) + torch.cuda.manual_seed(self.r_seed) # Remove auxiliary task if monocular if self.monocular and self.tasks[-1] == 'aux': @@ -95,25 +92,28 @@ class Trainer: input_size = 34 output_size = 9 + name = 'monoloco_pp' if self.monocular else 'monstereo' now = datetime.datetime.now() now_time = now.strftime("%Y%m%d-%H%M")[2:] - name_out = 'monstereo-' + now_time - if self.save: + name_out = name + '-' + now_time + if not self.no_save: self.path_model = os.path.join(dir_out, name_out + '.pkl') self.logger = set_logger(os.path.join(dir_logs, name_out)) self.logger.info("Training arguments: \nepochs: {} \nbatch_size: {} \ndropout: {}" "\nmonocular: {} \nlearning rate: {} \nscheduler step: {} \nscheduler gamma: {} " "\ninput_size: {} \noutput_size: {}\nhidden_size: {} \nn_stages: {} " "\nr_seed: {} \nlambdas: {} \ninput_file: {}" - .format(epochs, bs, dropout, self.monocular, lr, sched_step, sched_gamma, input_size, - output_size, hidden_size, n_stage, r_seed, self.lambdas, self.joints)) + .format(args.epochs, args.bs, args.dropout, self.monocular, + args.lr, args.sched_step, args.sched_gamma, input_size, + output_size, args.hidden_size, args.n_stage, args.r_seed, + self.lambdas, self.joints)) else: logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger(__name__) # Dataloader self.dataloaders = {phase: DataLoader(KeypointsDataset(self.joints, phase=phase), - batch_size=bs, shuffle=True) for phase in ['train', 'val']} + batch_size=args.bs, shuffle=True) for phase in ['train', 'val']} self.dataset_sizes = {phase: len(KeypointsDataset(self.joints, phase=phase)) for phase in ['train', 'val']} @@ -122,15 +122,16 @@ class Trainer: self.logger.info('Sizes of the dataset: {}'.format(self.dataset_sizes)) print(">>> creating model") - self.model = MonStereoModel(input_size=input_size, output_size=output_size, linear_size=hidden_size, - p_dropout=dropout, num_stage=self.n_stage, device=self.device) + self.model = MonStereoModel(input_size=input_size, output_size=output_size, linear_size=args.hidden_size, + p_dropout=args.dropout, num_stage=self.n_stage, device=self.device) self.model.to(self.device) print(">>> model params: {:.3f}M".format(sum(p.numel() for p in self.model.parameters()) / 1000000.0)) print(">>> loss params: {}".format(sum(p.numel() for p in self.mt_loss.parameters()))) # Optimizer and scheduler all_params = chain(self.model.parameters(), self.mt_loss.parameters()) - self.optimizer = torch.optim.Adam(params=all_params, lr=lr) + self.optimizer = torch.optim.Adam(params=all_params, lr=args.lr) + self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min') self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=self.sched_step, gamma=self.sched_gamma) def train(self): @@ -155,11 +156,11 @@ class Trainer: labels = labels.to(self.device) with torch.set_grad_enabled(phase == 'train'): if phase == 'train': + self.optimizer.zero_grad() outputs = self.model(inputs) loss, loss_values = self.mt_loss(outputs, labels, phase=phase) - self.optimizer.zero_grad() loss.backward() - torch.nn.utils.clip_grad_norm_(self.model.parameters(), 2) + torch.nn.utils.clip_grad_norm_(self.model.parameters(), 3) self.optimizer.step() self.scheduler.step() @@ -242,7 +243,7 @@ class Trainer: self.cout_stats(dic_err['val'], size_eval, clst=clst) # Save the model and the results - if self.save and not load: + if not (self.no_save or load): torch.save(self.model.state_dict(), self.path_model) print('-' * 120) self.logger.info("\nmodel saved: {} \n".format(self.path_model)) @@ -264,7 +265,6 @@ class Trainer: # Distance errs = torch.abs(extract_outputs(outputs)['d'] - extract_labels(labels)['d']) - assert rel_frac > 0.99, "Variance of errors not supported with partial evaluation" # Uncertainty diff --git a/monstereo/utils/iou.py b/monstereo/utils/iou.py index 24ad252..cc5d813 100644 --- a/monstereo/utils/iou.py +++ b/monstereo/utils/iou.py @@ -57,7 +57,7 @@ def get_iou_matches(boxes, boxes_gt, iou_min=0.3): ious.append(iou) idx_gt_max = int(np.argmax(ious)) if (ious[idx_gt_max] >= iou_min) and (idx_gt_max not in used): - matches.append((idx, idx_gt_max)) + matches.append((int(idx), idx_gt_max)) used.append(idx_gt_max) return matches @@ -93,6 +93,6 @@ def reorder_matches(matches, boxes, mode='left_rigth'): # Order the boxes based on the left-right position in the image and ordered_boxes = np.argsort([box[0] for box in boxes]) # indices of boxes ordered from left to right - matches_left = [idx for (idx, _) in matches] + matches_left = [int(idx) for (idx, _) in matches] return [matches[matches_left.index(idx_boxes)] for idx_boxes in ordered_boxes if idx_boxes in matches_left] diff --git a/monstereo/utils/kitti.py b/monstereo/utils/kitti.py index d1e7ee2..6fc39c8 100644 --- a/monstereo/utils/kitti.py +++ b/monstereo/utils/kitti.py @@ -199,11 +199,11 @@ def factory_file(path_calib, dir_ann, basename, mode='left'): if mode == 'left': kk, tt = p_left[:] - path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json') + path_ann = os.path.join(dir_ann, basename + '.png.predictions.json') else: kk, tt = p_right[:] - path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json') + path_ann = os.path.join(dir_ann + '_right', basename + '.png.predictions.json') from ..utils import open_annotations annotations = open_annotations(path_ann) diff --git a/monstereo/utils/misc.py b/monstereo/utils/misc.py index c3ab6eb..2f0e583 100644 --- a/monstereo/utils/misc.py +++ b/monstereo/utils/misc.py @@ -20,14 +20,14 @@ def append_cluster(dic_jo, phase, xx, ys, kps): dic_jo[phase]['clst']['30']['kps'].append(kps) dic_jo[phase]['clst']['30']['X'].append(xx) dic_jo[phase]['clst']['30']['Y'].append(ys) - elif ys[3] < 50: - dic_jo[phase]['clst']['50']['kps'].append(kps) - dic_jo[phase]['clst']['50']['X'].append(xx) - dic_jo[phase]['clst']['50']['Y'].append(ys) + elif ys[3] <= 40: + dic_jo[phase]['clst']['40']['kps'].append(kps) + dic_jo[phase]['clst']['40']['X'].append(xx) + dic_jo[phase]['clst']['40']['Y'].append(ys) else: - dic_jo[phase]['clst']['>50']['kps'].append(kps) - dic_jo[phase]['clst']['>50']['X'].append(xx) - dic_jo[phase]['clst']['>50']['Y'].append(ys) + dic_jo[phase]['clst']['>40']['kps'].append(kps) + dic_jo[phase]['clst']['>40']['X'].append(xx) + dic_jo[phase]['clst']['>40']['Y'].append(ys) def get_task_error(dd): @@ -58,7 +58,7 @@ def make_new_directory(dir_out): if os.path.exists(dir_out): shutil.rmtree(dir_out) os.makedirs(dir_out) - print("Created empty output directory for {} txt files".format(dir_out)) + print("Created empty output directory {} ".format(dir_out)) def normalize_hwl(lab): diff --git a/monstereo/visuals/figures.py b/monstereo/visuals/figures.py index bf0bddc..70355da 100644 --- a/monstereo/visuals/figures.py +++ b/monstereo/visuals/figures.py @@ -17,21 +17,22 @@ DPI = 200 GRID_WIDTH = 0.5 -def show_results(dic_stats, clusters, dir_out='data/figures', show=False, save=False, stereo=True): +def show_results(dic_stats, clusters, net, dir_fig, show=False, save=False): """ Visualize error as function of the distance and compare it with target errors based on human height analyses """ phase = 'test' x_min = 3 - x_max = 42 + # x_max = 42 + x_max = 31 y_min = 0 # y_max = 2.2 - y_max = 3.5 if stereo else 5.2 + y_max = 3.5 if net == 'monstereo' else 2.7 xx = np.linspace(x_min, x_max, 100) - excl_clusters = ['all', 'easy', 'moderate', 'hard'] + excl_clusters = ['all', 'easy', 'moderate', 'hard', '49'] clusters = [clst for clst in clusters if clst not in excl_clusters] - styles = printing_styles(stereo) + styles = printing_styles(net) for idx_style, style in enumerate(styles.items()): plt.figure(idx_style, figsize=FIGSIZE) plt.grid(linewidth=GRID_WIDTH) @@ -48,10 +49,10 @@ def show_results(dic_stats, clusters, dir_out='data/figures', show=False, save=F plt.plot(xxs, errs, marker=styles['mks'][idx], markersize=styles['mksizes'][idx], linewidth=styles['lws'][idx], label=styles['labels'][idx], linestyle=styles['lstyles'][idx], color=styles['colors'][idx]) - if method in ('monstereo', 'pseudo-lidar'): + if method in ('monstereo', 'monoloco_pp', 'pseudo-lidar'): for i, x in enumerate(xxs): - plt.text(x, errs[i], str(cnts[i]), fontsize=FONTSIZE) - if not stereo: + plt.text(x, errs[i] - 0.1, str(cnts[i]), fontsize=FONTSIZE) + if net == 'monoloco_pp': plt.plot(xx, get_task_error(xx), '--', label="Task error", color='lightgreen', linewidth=2.5) # if stereo: # yy_stereo = get_pixel_error(xx) @@ -62,61 +63,61 @@ def show_results(dic_stats, clusters, dir_out='data/figures', show=False, save=F plt.yticks(fontsize=FONTSIZE) if save: plt.tight_layout() - mode = 'stereo' if stereo else 'mono' - path_fig = os.path.join(dir_out, 'results_' + mode + '.png') + path_fig = os.path.join(dir_fig, 'results_' + net + '.png') plt.savefig(path_fig, dpi=DPI) - print("Figure of results " + mode + " saved in {}".format(path_fig)) + print("Figure of results " + net + " saved in {}".format(path_fig)) if show: plt.show() plt.close('all') -def show_spread(dic_stats, clusters, dir_out='data/figures', show=False, save=False): +def show_spread(dic_stats, clusters, net, dir_fig, show=False, save=False): """Predicted confidence intervals and task error as a function of ground-truth distance""" + assert net in ('monoloco_pp', 'monstereo'), "network not recognized" phase = 'test' - excl_clusters = ['all', 'easy', 'moderate', 'hard'] + excl_clusters = ['all', 'easy', 'moderate', 'hard', '49'] clusters = [clst for clst in clusters if clst not in excl_clusters] x_min = 3 - x_max = 42 + x_max = 31 y_min = 0 - for method in ('monoloco_pp', 'monstereo'): - plt.figure(2, figsize=FIGSIZE) - xxs = get_distances(clusters) - bbs = np.array([dic_stats[phase][method][key]['std_ale'] for key in clusters[:-1]]) - if method == 'monoloco_pp': - y_max = 5 - color = 'deepskyblue' - epis = np.array([dic_stats[phase][method][key]['std_epi'] for key in clusters[:-1]]) - plt.plot(xxs, epis, marker='o', color='coral', label="Combined uncertainty (\u03C3)") - else: - y_max = 3.5 - color = 'b' - plt.plot(xx, get_pixel_error(xx), linewidth=2.5, color='k', label='Pixel error') - plt.plot(xxs, bbs, marker='s', color=color, label="Aleatoric uncertainty (b)", linewidth=4, markersize=8) - xx = np.linspace(x_min, x_max, 100) - plt.plot(xx, get_task_error(xx), '--', label="Task error (monocular bound)", color='lightgreen', linewidth=4) + plt.figure(2, figsize=FIGSIZE) + xxs = get_distances(clusters) + bbs = np.array([dic_stats[phase][net][key]['std_ale'] for key in clusters[:-1]]) + xx = np.linspace(x_min, x_max, 100) + if net == 'monoloco_pp': + y_max = 2.7 + color = 'deepskyblue' + epis = np.array([dic_stats[phase][net][key]['std_epi'] for key in clusters[:-1]]) + plt.plot(xxs, epis, marker='o', color='coral', linewidth=4, markersize=8, label="Combined uncertainty (\u03C3)") + else: + y_max = 3.5 + color = 'b' + plt.plot(xx, get_pixel_error(xx), linewidth=2.5, color='k', label='Pixel error') + plt.plot(xxs, bbs, marker='s', color=color, label="Aleatoric uncertainty (b)", linewidth=4, markersize=8) + plt.plot(xx, get_task_error(xx), '--', label="Task error (monocular bound)", color='lightgreen', linewidth=4) - plt.xlabel("Ground-truth distance [m]", fontsize=FONTSIZE) - plt.ylabel("Uncertainty [m]", fontsize=FONTSIZE) - plt.xlim(x_min, x_max) - plt.ylim(y_min, y_max) - plt.grid(linewidth=GRID_WIDTH) - plt.legend(prop={'size': FONTSIZE}) - plt.xticks(fontsize=FONTSIZE) - plt.yticks(fontsize=FONTSIZE) - if save: - plt.tight_layout() - path_fig = os.path.join(dir_out, 'spread_' + method + '.png') - plt.savefig(path_fig, dpi=DPI) - print("Figure of confidence intervals saved in {}".format(path_fig)) - if show: - plt.show() - plt.close('all') + plt.xlabel("Ground-truth distance [m]", fontsize=FONTSIZE) + plt.ylabel("Uncertainty [m]", fontsize=FONTSIZE) + plt.xlim(x_min, x_max) + plt.ylim(y_min, y_max) + plt.grid(linewidth=GRID_WIDTH) + plt.legend(prop={'size': FONTSIZE}) + plt.xticks(fontsize=FONTSIZE) + plt.yticks(fontsize=FONTSIZE) + + if save: + plt.tight_layout() + path_fig = os.path.join(dir_fig, 'spread_' + net + '.png') + plt.savefig(path_fig, dpi=DPI) + print("Figure of confidence intervals saved in {}".format(path_fig)) + if show: + plt.show() + plt.close('all') -def show_task_error(show, save, dir_out='data/figures'): +def show_task_error(dir_fig, show, save): """Task error figure""" plt.figure(3, figsize=FIGSIZE) xx = np.linspace(0.1, 50, 100) @@ -147,7 +148,7 @@ def show_task_error(show, save, dir_out='data/figures'): plt.xticks(fontsize=FONTSIZE) plt.yticks(fontsize=FONTSIZE) if save: - path_fig = os.path.join(dir_out, 'task_error.png') + path_fig = os.path.join(dir_fig, 'task_error.png') plt.savefig(path_fig, dpi=DPI) print("Figure of task error saved in {}".format(path_fig)) if show: @@ -181,7 +182,7 @@ def show_method(save, dir_out='data/figures'): plt.close('all') -def show_box_plot(dic_errors, clusters, dir_out='data/figures', show=False, save=False): +def show_box_plot(dic_errors, clusters, dir_fig, show=False, save=False): import pandas as pd excl_clusters = ['all', 'easy', 'moderate', 'hard'] clusters = [int(clst) for clst in clusters if clst not in excl_clusters] @@ -205,7 +206,7 @@ def show_box_plot(dic_errors, clusters, dir_out='data/figures', show=False, save plt.ylim(y_min, y_max) if save: - path_fig = os.path.join(dir_out, 'box_plot_' + name + '.png') + path_fig = os.path.join(dir_fig, 'box_plot_' + name + '.png') plt.tight_layout() plt.savefig(path_fig, dpi=DPI) print("Figure of box plot saved in {}".format(path_fig)) @@ -300,8 +301,8 @@ def get_percentile(dist_gmm): # mad_d = np.mean(np.abs(dist_d - mu_d)) -def printing_styles(stereo): - if stereo: +def printing_styles(net): + if net == 'monstereo': style = {"labels": ['3DOP', 'PSF', 'MonoLoco', 'MonoPSR', 'Pseudo-Lidar', 'Our MonStereo'], "methods": ['3dop', 'psf', 'monoloco', 'monopsr', 'pseudo-lidar', 'monstereo'], "mks": ['s', 'p', 'o', 'v', '*', '^'], @@ -309,11 +310,12 @@ def printing_styles(stereo): "colors": ['gold', 'skyblue', 'darkgreen', 'pink', 'darkorange', 'b'], "lstyles": ['solid', 'solid', 'dashed', 'dashed', 'solid', 'solid']} else: - style = {"labels": ['Mono3D', 'Geometric Baseline', 'MonoPSR', '3DOP (stereo)', 'MonoLoco', 'Monoloco++'], - "methods": ['m3d', 'geometric', 'monopsr', '3dop', 'monoloco', 'monoloco_pp'], + style = {"labels": ['Geometric Baseline', 'MonoPSR', 'MonoDIS', '3DOP (stereo)', + 'MonoLoco', 'Monoloco++'], + "methods": ['geometric', 'monopsr', 'monodis', '3dop', 'monoloco', 'monoloco_pp'], "mks": ['*', '^', 'p', '.', 's', 'o', 'o'], "mksizes": [6, 6, 6, 6, 6, 6], "lws": [1.5, 1.5, 1.5, 1.5, 1.5, 2.2], - "colors": ['r', 'purple', 'olive', 'darkorange', 'b', 'darkblue'], + "colors": ['purple', 'olive', 'r', 'darkorange', 'b', 'darkblue'], "lstyles": ['solid', 'solid', 'solid', 'dashdot', 'solid', 'solid', ]} return style diff --git a/monstereo/visuals/pifpaf_show.py b/monstereo/visuals/pifpaf_show.py index 7a00736..fc7811e 100644 --- a/monstereo/visuals/pifpaf_show.py +++ b/monstereo/visuals/pifpaf_show.py @@ -1,3 +1,6 @@ + +# File adapted from https://github.com/vita-epfl/openpifpaf + from contextlib import contextmanager import numpy as np @@ -39,21 +42,20 @@ def canvas(fig_file=None, show=True, **kwargs): @contextmanager def image_canvas(image, fig_file=None, show=True, dpi_factor=1.0, fig_width=10.0, **kwargs): if 'figsize' not in kwargs: - kwargs['figsize'] = (fig_width, fig_width * image.shape[0] / image.shape[1]) + kwargs['figsize'] = (fig_width, fig_width * image.size[1] / image.size[0]) fig = plt.figure(**kwargs) ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0]) ax.set_axis_off() - ax.set_xlim(0, image.shape[1]) - ax.set_ylim(image.shape[0], 0) + ax.set_xlim(0, image.size[0]) + ax.set_ylim(image.size[1], 0) fig.add_axes(ax) image_2 = ndimage.gaussian_filter(image, sigma=2.5) ax.imshow(image_2, alpha=0.4) - yield ax if fig_file: - fig.savefig(fig_file, dpi=image.shape[1] / kwargs['figsize'][0] * dpi_factor) + fig.savefig(fig_file, dpi=image.size[0] / kwargs['figsize'][0] * dpi_factor) print('keypoints image saved') if show: plt.show() diff --git a/monstereo/visuals/printer.py b/monstereo/visuals/printer.py index bf3e2aa..7ed700c 100644 --- a/monstereo/visuals/printer.py +++ b/monstereo/visuals/printer.py @@ -28,7 +28,7 @@ def image_attributes(dpi, output_types): fontsize_num=round(22 * c), fontsize_ax=round(16 * c), linewidth=round(8 * c), - markersize=round(16 * c), + markersize=round(13 * c), y_box_margin=round(24 * math.sqrt(c)), stereo=dict(color='deepskyblue', numcolor='darkorange', @@ -58,7 +58,7 @@ class Printer: self.output_path = output_path self.kk = kk self.output_types = args.output_types - self.z_max = args.z_max # To include ellipses in the image + self.z_max = args.z_max # set max distance to show instances self.show_all = args.show_all self.show = args.show_all self.save = not args.no_save @@ -74,26 +74,41 @@ class Printer: self.xx_gt = [xx[0] for xx in dic_ann['xyz_real']] self.xx_pred = [xx[0] for xx in dic_ann['xyz_pred']] + # Set maximum distance + self.dd_pred = dic_ann['dds_pred'] + self.dd_real = dic_ann['dds_real'] + self.z_max = int(min(self.z_max, 4 + max(max(self.dd_pred), max(self.dd_real, default=0)))) + # Do not print instances outside z_max self.zz_gt = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0 for idx, xx in enumerate(dic_ann['xyz_real'])] self.zz_pred = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0 for idx, xx in enumerate(dic_ann['xyz_pred'])] - self.dd_pred = dic_ann['dds_pred'] - self.dd_real = dic_ann['dds_real'] + self.uv_heads = dic_ann['uv_heads'] self.uv_shoulders = dic_ann['uv_shoulders'] self.boxes = dic_ann['boxes'] self.boxes_gt = dic_ann['boxes_gt'] self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1]) - if dic_ann['aux']: - self.auxs = dic_ann['aux'] if dic_ann['aux'] else None + self.auxs = dic_ann['aux'] + if len(self.auxs) == 0: + self.modes = ['mono'] * len(self.dd_pred) + else: + self.modes = [] + for aux in self.auxs: + if aux <= 0.3: + self.modes.append('mono') + else: + self.modes.append('stereo') - def factory_axes(self): + def factory_axes(self, dic_out): """Create axes for figures: front bird multi""" axes = [] figures = [] + # Process the annotation dictionary of monoloco + self._process_results(dic_out) + # Initialize multi figure, resizing it for aesthetic proportion if 'multi' in self.output_types: assert 'bird' and 'front' not in self.output_types, \ @@ -150,10 +165,7 @@ class Printer: axes.append(ax1) return figures, axes - def draw(self, figures, axes, dic_out, image): - - # Process the annotation dictionary of monoloco - self._process_results(dic_out) + def draw(self, figures, axes, image): # whether to include instances that don't match the ground-truth iterator = range(len(self.zz_pred)) if self.show_all else range(len(self.zz_gt)) @@ -163,9 +175,9 @@ class Printer: # Draw the front figure number = dict(flag=False, num=97) - if 'multi' in self.output_types: + if any(xx in self.output_types for xx in ['front', 'multi']): number['flag'] = True # add numbers - self.mpl_im0.set_data(image) + self.mpl_im0.set_data(image) for idx in iterator: if any(xx in self.output_types for xx in ['front', 'multi']) and self.zz_pred[idx] > 0: self._draw_front(axes[0], @@ -199,8 +211,6 @@ class Printer: def _draw_front(self, ax, z, idx, number): - mode = 'stereo' if self.auxs[idx] > 0.3 else 'mono' - # Bbox w = min(self.width-2, self.boxes[idx][2] - self.boxes[idx][0]) h = min(self.height-2, (self.boxes[idx][3] - self.boxes[idx][1]) * self.y_scale) @@ -211,12 +221,12 @@ class Printer: width=w, height=h, fill=False, - color=self.attr[mode]['color'], - linewidth=self.attr[mode]['linewidth']) + color=self.attr[self.modes[idx]]['color'], + linewidth=self.attr[self.modes[idx]]['linewidth']) ax.add_patch(rectangle) z_str = str(z).split(sep='.') text = z_str[0] + '.' + z_str[1][0] - bbox_config = {'facecolor': self.attr[mode]['color'], 'alpha': 0.4, 'linewidth': 0} + bbox_config = {'facecolor': self.attr[self.modes[idx]]['color'], 'alpha': 0.4, 'linewidth': 0} x_t = x0 - 1.5 y_t = y1 + self.attr['y_box_margin'] @@ -236,12 +246,12 @@ class Printer: y1 + 14, chr(number['num']), fontsize=self.attr['fontsize_num'], - color=self.attr[mode]['numcolor'], + color=self.attr[self.modes[idx]]['numcolor'], weight='bold') def _draw_text_bird(self, axes, idx, num): """Plot the number in the bird eye view map""" - mode = 'stereo' if self.auxs[idx] > 0.3 else 'mono' + std = self.stds_epi[idx] if self.stds_epi[idx] > 0 else self.stds_ale[idx] theta = math.atan2(self.zz_pred[idx], self.xx_pred[idx]) @@ -250,7 +260,7 @@ class Printer: axes[1].text(self.xx_pred[idx] + delta_x + 0.2, self.zz_pred[idx] + delta_z + 0/2, chr(num), fontsize=self.attr['fontsize_bv'], - color=self.attr[mode]['numcolor']) + color=self.attr[self.modes[idx]]['numcolor']) def _draw_uncertainty(self, axes, idx): diff --git a/setup.py b/setup.py index 6771aa4..6d974de 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,8 @@ setup( 'monstereo.utils' ], license='GNU AGPLv3', - description='MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization', + description=' Perceiving Humans: from Monocular 3D Localization to Social Distancing ' + '/ MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization', long_description=open('README.md').read(), long_description_content_type='text/markdown', author='Lorenzo Bertoni', @@ -27,9 +28,7 @@ setup( zip_safe=False, install_requires=[ - 'openpifpaf==0.8.0', - 'torch==1.1.0', - 'torchvision==0.3.0' + 'openpifpaf>=0.11' ], extras_require={ 'eval': [