Merge pull request #3 from vita-epfl/update
OpenPifPaf 0.12 and cleaning
@ -9,7 +9,7 @@ Good-names=xx,dd,zz,hh,ww,pp,kk,lr,w1,w2,w3,mm,im,uv,ax,COV_MIN,CONF_MIN
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
disable=import-error,invalid-name,unused-variable,fixme,E1102,missing-docstring,useless-object-inheritance,duplicate-code,too-many-arguments,too-many-instance-attributes,too-many-locals,too-few-public-methods,arguments-differ,logging-format-interpolation
|
||||
disable=import-error,invalid-name,unused-variable,E1102,missing-docstring,useless-object-inheritance,duplicate-code,too-many-arguments,too-many-instance-attributes,too-many-locals,too-few-public-methods,arguments-differ,logging-format-interpolation,import-outside-toplevel
|
||||
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
|
||||
4
LICENSE
@ -1,4 +1,4 @@
|
||||
Copyright 2020 by EPFL/VITA. All rights reserved.
|
||||
Copyright 2020-2021 by EPFL/VITA. All rights reserved.
|
||||
|
||||
This project and all its files are licensed under
|
||||
GNU AGPLv3 or later version.
|
||||
@ -6,4 +6,4 @@ GNU AGPLv3 or later version.
|
||||
If this license is not suitable for your business or project
|
||||
please contact EPFL-TTO (https://tto.epfl.ch/) for a full commercial license.
|
||||
|
||||
This software may not be used to harm any person deliberately.
|
||||
This software may not be used to harm any person deliberately or for any military application.
|
||||
|
||||
66
README.md
@ -1,19 +1,61 @@
|
||||
# Perceiving Humans in 3D
|
||||
|
||||
This repository contains the code for three research projects:
|
||||
|
||||
1. **MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization**
|
||||
[README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonStereo.md) & [Article](https://arxiv.org/abs/2008.10913)
|
||||
|
||||

|
||||
|
||||
2. **Perceiving Humans: from Monocular 3D Localization to Social Distancing**
|
||||
This repository contains the code for two research projects:
|
||||
|
||||
1. **Perceiving Humans: from Monocular 3D Localization to Social Distancing (MonoLoco++)**
|
||||
[README](https://github.com/vita-epfl/monstereo/blob/update/docs/MonoLoco%2B%2B.md) & [Article](https://arxiv.org/abs/2009.00984)
|
||||
|
||||

|
||||

|
||||
|
||||
3. **MonoLoco: Monocular 3D Pedestrian Localization and Uncertainty Estimation** (Improved!)
|
||||
[README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonoLoco.md) & [Article](https://arxiv.org/abs/1906.06059) & [Original Repo](https://github.com/vita-epfl/monoloco)
|
||||

|
||||
|
||||
|
||||
2. **MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization**
|
||||
[README](https://github.com/vita-epfl/monstereo/blob/update/docs/MonStereo.md) & [Article](https://arxiv.org/abs/2008.10913)
|
||||
|
||||

|
||||
|
||||

|
||||
Both projects has been built upon the CVPR'19 project [Openpifpaf](https://github.com/vita-epfl/openpifpaf)
|
||||
for 2D pose estimation and the ICCV'19 project [MonoLoco](https://github.com/vita-epfl/monoloco) for monocular 3D localization.
|
||||
All projects share the AGPL Licence.
|
||||
|
||||
|
||||
# Setup
|
||||
Installation steps are the same for both projects.
|
||||
|
||||
### Install
|
||||
The installation has been tested on OSX and Linux operating systems, with Python 3.6 or Python 3.7.
|
||||
Packages have been installed with pip and virtual environments.
|
||||
For quick installation, do not clone this repository,
|
||||
and make sure there is no folder named monstereo in your current directory.
|
||||
A GPU is not required, yet highly recommended for real-time performances.
|
||||
MonStereo can be installed as a package, by:
|
||||
|
||||
```
|
||||
pip3 install monstereo
|
||||
```
|
||||
|
||||
For development of the monstereo source code itself, you need to clone this repository and then:
|
||||
```
|
||||
pip3 install sdist
|
||||
cd monstereo
|
||||
python3 setup.py sdist bdist_wheel
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
### Interfaces
|
||||
All the commands are run through a main file called `main.py` using subparsers.
|
||||
To check all the commands for the parser and the subparsers (including openpifpaf ones) run:
|
||||
|
||||
* `python3 -m monstereo.run --help`
|
||||
* `python3 -m monstereo.run predict --help`
|
||||
* `python3 -m monstereo.run train --help`
|
||||
* `python3 -m monstereo.run eval --help`
|
||||
* `python3 -m monstereo.run prep --help`
|
||||
|
||||
or check the file `monstereo/run.py`
|
||||
|
||||
Further instructions for prediction, preprocessing, training and evaluation can be found here:
|
||||
|
||||
* [MonStereo README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonStereo.md)
|
||||
* [MonoLoco++ README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonoLoco_pp.md)
|
||||
|
||||
BIN
docs/000840_multi.jpg
Normal file
|
After Width: | Height: | Size: 633 KiB |
BIN
docs/002282.png
Executable file
|
After Width: | Height: | Size: 831 KiB |
@ -24,53 +24,15 @@ month = {August},
|
||||
year = {2020}
|
||||
}
|
||||
```
|
||||
|
||||
# Prediction
|
||||
The predict script receives an image (or an entire folder using glob expressions),
|
||||
calls PifPaf for 2d human pose detection over the image
|
||||
and runs MonStereo for 3d location of the detected poses.
|
||||
|
||||
# Features
|
||||
The code has been built upon the ICCV'19 project [MonoLoco](https://github.com/vita-epfl/monoloco).
|
||||
This repository supports
|
||||
Output options include json files and/or visualization of the predictions on the image in *frontal mode*,
|
||||
*birds-eye-view mode* or *multi mode* and can be specified with `--output_types`
|
||||
|
||||
* the original MonoLoco
|
||||
* An improved Monocular version (MonoLoco++) for x,y,z coordinates, orientation, and dimensions
|
||||
* MonStereo
|
||||
|
||||
# Setup
|
||||
|
||||
### Install
|
||||
The installation has been tested on OSX and Linux operating systems, with Python 3.6 or Python 3.7.
|
||||
Packages have been installed with pip and virtual environments.
|
||||
For quick installation, do not clone this repository,
|
||||
and make sure there is no folder named monstereo in your current directory.
|
||||
A GPU is not required, yet highly recommended for real-time performances.
|
||||
MonStereo can be installed as a package, by:
|
||||
|
||||
```
|
||||
pip3 install monstereo
|
||||
```
|
||||
|
||||
For development of the monstereo source code itself, you need to clone this repository and then:
|
||||
```
|
||||
pip3 install sdist
|
||||
cd monstereo
|
||||
python3 setup.py sdist bdist_wheel
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
### Data structure
|
||||
|
||||
Data
|
||||
├── arrays
|
||||
├── models
|
||||
├── kitti
|
||||
├── logs
|
||||
├── output
|
||||
|
||||
|
||||
Run the following to create the folders:
|
||||
```
|
||||
mkdir data
|
||||
cd data
|
||||
mkdir arrays models kitti logs output
|
||||
```
|
||||
|
||||
### Pre-trained Models
|
||||
* Download Monstereo pre-trained model from
|
||||
@ -85,27 +47,6 @@ Alternatively, you can download a Pifpaf pre-trained model from [openpifpaf](htt
|
||||
If you'd like to use an updated version, we suggest to re-train the MonStereo model as well.
|
||||
* The model for the experiments is provided in *data/models/ms-200710-1511.pkl*
|
||||
|
||||
# Interfaces
|
||||
All the commands are run through a main file called `main.py` using subparsers.
|
||||
To check all the commands for the parser and the subparsers (including openpifpaf ones) run:
|
||||
|
||||
* `python3 -m monstereo.run --help`
|
||||
* `python3 -m monstereo.run predict --help`
|
||||
* `python3 -m monstereo.run train --help`
|
||||
* `python3 -m monstereo.run eval --help`
|
||||
* `python3 -m monstereo.run prep --help`
|
||||
|
||||
or check the file `monstereo/run.py`
|
||||
|
||||
# Prediction
|
||||
The predict script receives an image (or an entire folder using glob expressions),
|
||||
calls PifPaf for 2d human pose detection over the image
|
||||
and runs MonStereo for 3d location of the detected poses.
|
||||
|
||||
|
||||
Output options include json files and/or visualization of the predictions on the image in *frontal mode*,
|
||||
*birds-eye-view mode* or *multi mode* and can be specified with `--output_types`
|
||||
|
||||
|
||||
### Ground truth matching
|
||||
* In case you provide a ground-truth json file to compare the predictions of MonSter,
|
||||
@ -125,13 +66,13 @@ After downloading model and ground-truth file, a demo can be tested with the fol
|
||||
--model data/models/ms-200710-1511.pkl --z_max 30 --checkpoint resnet152 --path_gt data/arrays/names-kitti-200615-1022.json
|
||||
-o data/output`
|
||||
|
||||

|
||||

|
||||
|
||||
`python3 -m monstereo.run predict --glob docs/005523*.png --output_types multi --scale 2
|
||||
--model data/models/ms-200710-1511.pkl --z_max 30 --checkpoint resnet152 --path_gt data/arrays/names-kitti-200615-1022.json
|
||||
-o data/output`
|
||||
|
||||

|
||||

|
||||
|
||||
# Preprocessing
|
||||
Preprocessing and training step are already fully supported by the code provided,
|
||||
@ -139,6 +80,22 @@ but require first to run a pose detector over
|
||||
all the training images and collect the annotations.
|
||||
The code supports this option (by running the predict script and using `--mode pifpaf`).
|
||||
|
||||
### Data structure
|
||||
|
||||
Data
|
||||
├── arrays
|
||||
├── models
|
||||
├── kitti
|
||||
├── logs
|
||||
├── output
|
||||
|
||||
Run the following to create the folders:
|
||||
```
|
||||
mkdir data
|
||||
cd data
|
||||
mkdir arrays models kitti logs output
|
||||
```
|
||||
|
||||
|
||||
### Datasets
|
||||
Download KITTI ground truth files and camera calibration matrices for training
|
||||
@ -149,13 +106,20 @@ data/kitti/images`
|
||||
|
||||
|
||||
### Annotations to preprocess
|
||||
MonStereo is trained using 2D human pose joints. To create them run pifaf over KITTI training images.
|
||||
You can create them running the predict script and using `--mode pifpaf`.
|
||||
MonStereo is trained using 2D human pose joints. To obtain the joints the first step is to run
|
||||
pifaf over KITTI training images, by either running the predict script and using `--mode pifpaf`,
|
||||
or by using pifpaf code directly.
|
||||
MonStereo preprocess script expects annotations from left and right images in 2 different folders
|
||||
with the same path apart from the suffix `_right` for the ``right" folder.
|
||||
For example `data/annotations` and `data/annotations_right`.
|
||||
Do not change name of json files created by pifpaf. For each left annotation,
|
||||
the code will look for the corresponding right annotation.
|
||||
|
||||
### Inputs joints for training
|
||||
MonoStereo is trained using 2D human pose joints matched with the ground truth location provided by
|
||||
KITTI Dataset. To create the joints run: `python3 -m monstereo.run prep` specifying:
|
||||
1. `--dir_ann` annotation directory containing Pifpaf joints of KITTI.
|
||||
|
||||
`--dir_ann` annotation directory containing Pifpaf joints of KITTI for the left images.
|
||||
|
||||
|
||||
### Ground truth file for evaluation
|
||||
@ -165,7 +129,7 @@ by the image name to easily access ground truth files for evaluation and predict
|
||||
|
||||
# Training
|
||||
Provide the json file containing the preprocess joints as argument.
|
||||
As simple as `python3 -m monstereo.run train --joints <json file path>`
|
||||
As simple as `python3 -m monstereo.run train --joints <json file path> `
|
||||
All the hyperparameters options can be checked at `python3 -m monstereo.run train --help`.
|
||||
|
||||
# Evaluation (KITTI Dataset)
|
||||
|
||||
216
docs/MonoLoco++.md
Normal file
@ -0,0 +1,216 @@
|
||||
|
||||
# Perceiving Humans: from Monocular 3D Localization to Social Distancing
|
||||
|
||||
> Perceiving humans in the context of Intelligent Transportation Systems (ITS)
|
||||
often relies on multiple cameras or expensive LiDAR sensors.
|
||||
In this work, we present a new cost- effective vision-based method that perceives humans’ locations in 3D
|
||||
and their body orientation from a single image.
|
||||
We address the challenges related to the ill-posed monocular 3D tasks by proposing a deep learning method
|
||||
that predicts confidence intervals in contrast to point estimates. Our neural network architecture estimates
|
||||
humans 3D body locations and their orientation with a measure of uncertainty.
|
||||
Our vision-based system (i) is privacy-safe, (ii) works with any fixed or moving cameras,
|
||||
and (iii) does not rely on ground plane estimation.
|
||||
We demonstrate the performance of our method with respect to three applications:
|
||||
locating humans in 3D, detecting social interactions,
|
||||
and verifying the compliance of recent safety measures due to the COVID-19 outbreak.
|
||||
Indeed, we show that we can rethink the concept of “social distancing” as a form of social interaction
|
||||
in contrast to a simple location-based rule. We publicly share the source code towards an open science mission.
|
||||
|
||||
```
|
||||
@InProceedings{bertoni_social,
|
||||
author = {Bertoni, Lorenzo and Kreiss, Sven and Alahi, Alexandre},
|
||||
title={Perceiving Humans: from Monocular 3D Localization to Social Distancing},
|
||||
booktitle = {arXiv:2009.00984},
|
||||
month = {September},
|
||||
year = {2020}
|
||||
}
|
||||
```
|
||||

|
||||
|
||||
## Predictions
|
||||
For a quick setup download a pifpaf and a MonoLoco++ models from
|
||||
[here](https://drive.google.com/drive/folders/1jZToVMBEZQMdLB5BAIq2CdCLP5kzNo9t?usp=sharing)
|
||||
and save them into `data/models`.
|
||||
|
||||
### 3D Localization
|
||||
The predict script receives an image (or an entire folder using glob expressions),
|
||||
calls PifPaf for 2d human pose detection over the image
|
||||
and runs Monoloco++ for 3d location of the detected poses.
|
||||
The command `--net` defines if saving pifpaf outputs, MonoLoco++ outputs or MonStereo ones.
|
||||
You can check all commands for Pifpaf at [openpifpaf](https://github.com/vita-epfl/openpifpaf).
|
||||
|
||||
Output options include json files and/or visualization of the predictions on the image in *frontal mode*,
|
||||
*birds-eye-view mode* or *combined mode* and can be specified with `--output_types`
|
||||
|
||||
Ground-truth KITTI files for comparing results can be downloaded from
|
||||
[here](https://drive.google.com/drive/folders/1jZToVMBEZQMdLB5BAIq2CdCLP5kzNo9t?usp=sharing)
|
||||
(file called *names-kitti*) and should be saved into `data/arrays`
|
||||
Ground-truth files can also be generated, more info in the preprocessing section.
|
||||
|
||||
For an example image, run the following command:
|
||||
|
||||
```
|
||||
python -m monstereo.run predict \
|
||||
docs/002282.png \
|
||||
--net monoloco_pp \
|
||||
--output_types multi \
|
||||
--model data/models/monoloco_pp-201203-1424.pkl \
|
||||
--path_gt data/arrays/names-kitti-200615-1022.json \
|
||||
-o <output directory> \
|
||||
--long-edge <rescale the image by providing dimension of long side. If None original resolution>
|
||||
--n_dropout <50 to include epistemic uncertainty, 0 otherwise>
|
||||
```
|
||||
|
||||

|
||||
|
||||
To show all the instances estimated by MonoLoco add the argument `show_all` to the above command.
|
||||
|
||||

|
||||
|
||||
### Social Distancing
|
||||
To visualize social distancing compliance, simply add the argument `--social-distance` to the predict command.
|
||||
|
||||
An example from the Collective Activity Dataset is provided below.
|
||||
|
||||
<img src="frame0038.jpg" width="500"/>
|
||||
|
||||
To visualize social distancing run the below, command:
|
||||
```
|
||||
python -m monstereo.run predict \
|
||||
docs/frame0038.jpg \
|
||||
--net monoloco_pp \
|
||||
--social_distance \
|
||||
--output_types front bird --show_all \
|
||||
--model data/models/monoloco_pp-201203-1424.pkl -o <output directory>
|
||||
```
|
||||
<img src="out_frame0038.jpg.front.png" width="400"/>
|
||||
|
||||
|
||||
<img src="out_frame0038.jpg.bird.png" width="400"/>
|
||||
|
||||
Threshold distance and radii (for F-formations) can be set using `--threshold-dist` and `--radii`, respectively.
|
||||
|
||||
For more info, run:
|
||||
|
||||
`python -m monstereo.run predict --help`
|
||||
|
||||
### Orientation and Bounding Box dimensions
|
||||
MonoLoco++ estimates orientation and box dimensions as well. Results are saved in a json file when using the command
|
||||
`--output_types json`. At the moment, the only visualization including orientation is the social distancing one.
|
||||
|
||||
## Preprocessing
|
||||
|
||||
### Kitti
|
||||
Annotations from a pose detector needs to be stored in a folder.
|
||||
For example by using [openpifpaf](https://github.com/vita-epfl/openpifpaf):
|
||||
```
|
||||
python -m openpifpaf.predict \
|
||||
--glob "<kitti images directory>/*.png" \
|
||||
--json-output <directory to contain predictions>
|
||||
--checkpoint=shufflenetv2k30 \
|
||||
--instance-threshold=0.05 --seed-threshold 0.05 --force-complete-pose
|
||||
```
|
||||
Once the step is complete:
|
||||
`python -m monstereo.run prep --dir_ann <directory that contains predictions> --monocular`
|
||||
|
||||
|
||||
### Collective Activity Dataset
|
||||
To evaluate on of the [collective activity dataset](http://vhosts.eecs.umich.edu/vision//activity-dataset.html)
|
||||
(without any training) we selected 6 scenes that contain people talking to each other.
|
||||
This allows for a balanced dataset, but any other configuration will work.
|
||||
|
||||
THe expected structure for the dataset is the following:
|
||||
|
||||
collective_activity
|
||||
├── images
|
||||
├── annotations
|
||||
|
||||
where images and annotations inside have the following name convention:
|
||||
|
||||
IMAGES: seq<sequence_name>_frame<frame_name>.jpg
|
||||
ANNOTATIONS: seq<sequence_name>_annotations.txt
|
||||
|
||||
With respect to the original datasets the images and annotations are moved to a single folder
|
||||
and the sequence is added in their name. One command to do this is:
|
||||
|
||||
`rename -v -n 's/frame/seq14_frame/' f*.jpg`
|
||||
|
||||
which for example change the name of all the jpg images in that folder adding the sequence number
|
||||
(remove `-n` after checking it works)
|
||||
|
||||
Pifpaf annotations should also be saved in a single folder and can be created with:
|
||||
|
||||
```
|
||||
python -m openpifpaf.predict \
|
||||
--glob "data/collective_activity/images/*.jpg" \
|
||||
--checkpoint=shufflenetv2k30 \
|
||||
--instance-threshold=0.05 --seed-threshold 0.05 --force-complete-pose\
|
||||
--json-output /data/lorenzo-data/annotations/collective_activity/v012
|
||||
```
|
||||
|
||||
Finally, to evaluate activity using a MonoLoco++ pre-trained model trained either on nuSCENES or KITTI:
|
||||
```
|
||||
python -m monstereo.run eval --activity \
|
||||
--net monoloco_pp --dataset collective \
|
||||
--model <MonoLoco++ model path> --dir_ann <pifpaf annotations directory>
|
||||
```
|
||||
|
||||
## Training
|
||||
We train on KITTI or nuScenes dataset specifying the path of the input joints.
|
||||
|
||||
Our results are obtained with:
|
||||
|
||||
`python -m monstereo.run train --lr 0.001 --joints data/arrays/joints-kitti-201202-1743.json --save --monocular`
|
||||
|
||||
For a more extensive list of available parameters, run:
|
||||
|
||||
`python -m monstereo.run train --help`
|
||||
|
||||
## Evaluation
|
||||
|
||||
### 3D Localization
|
||||
We provide evaluation on KITTI for models trained on nuScenes or KITTI. We compare them with other monocular
|
||||
and stereo Baselines:
|
||||
|
||||
[MonoLoco](https://github.com/vita-epfl/monoloco),
|
||||
[Mono3D](https://www.cs.toronto.edu/~urtasun/publications/chen_etal_cvpr16.pdf),
|
||||
[3DOP](https://xiaozhichen.github.io/papers/nips15chen.pdf),
|
||||
[MonoDepth](https://arxiv.org/abs/1609.03677)
|
||||
[MonoPSR](https://github.com/kujason/monopsr) and our
|
||||
[MonoDIS](https://research.mapillary.com/img/publications/MonoDIS.pdf) and our
|
||||
[Geometrical Baseline](monoloco/eval/geom_baseline.py).
|
||||
|
||||
* **Mono3D**: download validation files from [here](http://3dimage.ee.tsinghua.edu.cn/cxz/mono3d)
|
||||
and save them into `data/kitti/m3d`
|
||||
* **3DOP**: download validation files from [here](https://xiaozhichen.github.io/)
|
||||
and save them into `data/kitti/3dop`
|
||||
* **MonoDepth**: compute an average depth for every instance using the following script
|
||||
[here](https://github.com/Parrotlife/pedestrianDepth-baseline/tree/master/MonoDepth-PyTorch)
|
||||
and save them into `data/kitti/monodepth`
|
||||
* **GeometricalBaseline**: A geometrical baseline comparison is provided.
|
||||
|
||||
Download the model for monoloco
|
||||
|
||||
The average geometrical value for comparison can be obtained running:
|
||||
```
|
||||
python -m monstereo.run eval
|
||||
--dir_ann <annotation directory>
|
||||
--model <model path>
|
||||
--net monoloco_pp
|
||||
--generate
|
||||
````
|
||||
|
||||
To include also geometric baselines and MonoLoco, add the flag ``--baselines``
|
||||
|
||||
### Activity Estimation (Talking)
|
||||
Please follow preprocessing steps for Collective activity dataset and run pifpaf over the dataset images.
|
||||
Evaluation on this dataset is done with models trained on either KITTI or nuScenes.
|
||||
For optimal performances, we suggest the model trained on nuScenes teaser (TODO add link)
|
||||
```
|
||||
python -m monstereo.run eval
|
||||
--activity
|
||||
--dataset collective
|
||||
--net monoloco_pp
|
||||
--model <path to the model>
|
||||
--dir_ann <annotation directory>
|
||||
```
|
||||
@ -1,14 +0,0 @@
|
||||
|
||||
### Work in Progress
|
||||
|
||||
For the moment please refer to the [original repository](https://github.com/vita-epfl/monoloco)
|
||||
|
||||
```
|
||||
@InProceedings{bertoni_perceiving,
|
||||
author = {Bertoni, Lorenzo and Kreiss, Sven and Alahi, Alexandre},
|
||||
title = {Perceiving Humans: from Monocular 3D Localization to Social Distancing},
|
||||
booktitle = {arXiv:2009.00984},
|
||||
month = {September},
|
||||
year = {2020}
|
||||
}
|
||||
```
|
||||
BIN
docs/frame0038.jpg
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/out_000840.jpg
Normal file
|
After Width: | Height: | Size: 197 KiB |
|
Before Width: | Height: | Size: 584 KiB |
BIN
docs/out_002282.png.multi.jpg
Normal file
|
After Width: | Height: | Size: 398 KiB |
BIN
docs/out_002282.png.multi_all.jpg
Normal file
|
After Width: | Height: | Size: 344 KiB |
BIN
docs/out_005523.jpg
Normal file
|
After Width: | Height: | Size: 255 KiB |
|
Before Width: | Height: | Size: 761 KiB |
BIN
docs/out_frame0038.jpg.bird.png
Normal file
|
After Width: | Height: | Size: 27 KiB |
BIN
docs/out_frame0038.jpg.front.png
Normal file
|
After Width: | Height: | Size: 184 KiB |
BIN
docs/pull_sd.png
|
Before Width: | Height: | Size: 2.2 MiB |
BIN
docs/social_distancing.jpg
Normal file
|
After Width: | Height: | Size: 289 KiB |
BIN
docs/truck.jpg
Normal file
|
After Width: | Height: | Size: 149 KiB |
BIN
docs/truck.png
|
Before Width: | Height: | Size: 460 KiB |
@ -2,24 +2,16 @@
|
||||
# pylint: disable=too-many-statements
|
||||
|
||||
import math
|
||||
import glob
|
||||
import os
|
||||
import copy
|
||||
from contextlib import contextmanager
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.patches import Circle, FancyArrow
|
||||
from PIL import Image
|
||||
|
||||
from .network.process import laplace_sampling
|
||||
from .utils import open_annotations
|
||||
from .visuals.pifpaf_show import KeypointPainter, image_canvas
|
||||
from .network import Loco
|
||||
from .network.process import factory_for_gt, preprocess_pifpaf
|
||||
|
||||
|
||||
def social_interactions(idx, centers, angles, dds, stds=None, social_distance=False,
|
||||
@ -27,17 +19,20 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa
|
||||
"""
|
||||
return flag of alert if social distancing is violated
|
||||
"""
|
||||
|
||||
# A) Check whether people are close together
|
||||
xx = centers[idx][0]
|
||||
zz = centers[idx][1]
|
||||
distances = [math.sqrt((xx - centers[i][0]) ** 2 + (zz - centers[i][1]) ** 2) for i, _ in enumerate(centers)]
|
||||
sorted_idxs = np.argsort(distances)
|
||||
indices = [idx_t for idx_t in sorted_idxs[1:] if distances[idx_t] <= threshold_dist]
|
||||
|
||||
# B) Check whether people are looking inwards and whether there are no intrusions
|
||||
# Deterministic
|
||||
if n_samples < 2:
|
||||
for idx_t in indices:
|
||||
if check_f_formations(idx, idx_t, centers, angles,
|
||||
radii=radii, # Binary value
|
||||
radii=radii, # Binary value
|
||||
social_distance=social_distance):
|
||||
return True
|
||||
|
||||
@ -72,8 +67,8 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa
|
||||
|
||||
def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False):
|
||||
"""
|
||||
Check F-formations for people close together:
|
||||
1) Empty space of 0.4 + meters (no other people or themselves inside)
|
||||
Check F-formations for people close together (this function do not expect far away people):
|
||||
1) Empty space of a certain radius (no other people or themselves inside)
|
||||
2) People looking inward
|
||||
"""
|
||||
|
||||
@ -91,119 +86,25 @@ def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False
|
||||
mu_1 = np.array([centers[idx_t][0] + radius * math.cos(theta1), centers[idx_t][1] - radius * math.sin(theta1)])
|
||||
o_c = (mu_0 + mu_1) / 2
|
||||
|
||||
# Verify they are looking inwards.
|
||||
# 1) Verify they are looking inwards.
|
||||
# The distance between mus and the center should be less wrt the original position and the center
|
||||
d_new = np.linalg.norm(mu_0 - mu_1) / 2 if social_distance else np.linalg.norm(mu_0 - mu_1)
|
||||
d_0 = np.linalg.norm(x_0 - o_c)
|
||||
d_1 = np.linalg.norm(x_1 - o_c)
|
||||
|
||||
# Verify no intrusion for third parties
|
||||
# 2) Verify no intrusion for third parties
|
||||
if other_centers.size:
|
||||
other_distances = np.linalg.norm(other_centers - o_c.reshape(1, -1), axis=1)
|
||||
else:
|
||||
other_distances = 100 * np.ones((1, 1)) # Condition verified if no other people
|
||||
|
||||
# Binary Classification
|
||||
# if np.min(other_distances) > radius: # Ablation without orientation
|
||||
if d_new <= min(d_0, d_1) and np.min(other_distances) > radius:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def predict(args):
|
||||
|
||||
cnt = 0
|
||||
args.device = torch.device('cpu')
|
||||
if torch.cuda.is_available():
|
||||
args.device = torch.device('cuda')
|
||||
|
||||
# Load data and model
|
||||
monoloco = Loco(model=args.model, net='monoloco_pp',
|
||||
device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||
|
||||
images = []
|
||||
images += glob.glob(args.glob) # from cli as a string or linux converts
|
||||
|
||||
# Option 1: Run PifPaf extract poses and run MonoLoco in a single forward pass
|
||||
if args.json_dir is None:
|
||||
from .network import PifPaf, ImageList
|
||||
pifpaf = PifPaf(args)
|
||||
data = ImageList(args.images, scale=args.scale)
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
data, batch_size=1, shuffle=False,
|
||||
pin_memory=args.pin_memory, num_workers=args.loader_workers)
|
||||
|
||||
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
|
||||
images = image_tensors.permute(0, 2, 3, 1)
|
||||
|
||||
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
|
||||
fields_batch = pifpaf.fields(processed_images)
|
||||
|
||||
# unbatch
|
||||
for image_path, image, processed_image_cpu, fields in zip(
|
||||
image_paths, images, processed_images_cpu, fields_batch):
|
||||
|
||||
if args.output_directory is None:
|
||||
output_path = image_path
|
||||
else:
|
||||
file_name = os.path.basename(image_path)
|
||||
output_path = os.path.join(args.output_directory, file_name)
|
||||
im_size = (float(image.size()[1] / args.scale),
|
||||
float(image.size()[0] / args.scale))
|
||||
|
||||
print('image', idx, image_path, output_path)
|
||||
|
||||
_, _, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
|
||||
|
||||
kk, dic_gt = factory_for_gt(im_size, name=image_path, path_gt=args.path_gt)
|
||||
image_t = image # Resized tensor
|
||||
|
||||
# Run Monoloco
|
||||
boxes, keypoints = preprocess_pifpaf(pifpaf_out, im_size, enlarge_boxes=False)
|
||||
dic_out = monoloco.forward(keypoints, kk)
|
||||
dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=False)
|
||||
|
||||
# Print
|
||||
show_social(args, image_t, output_path, pifpaf_out, dic_out)
|
||||
|
||||
print('Image {}\n'.format(cnt) + '-' * 120)
|
||||
cnt += 1
|
||||
|
||||
# Option 2: Load json file of poses from PifPaf and run monoloco
|
||||
else:
|
||||
for idx, im_path in enumerate(images):
|
||||
|
||||
# Load image
|
||||
with open(im_path, 'rb') as f:
|
||||
image = Image.open(f).convert('RGB')
|
||||
if args.output_directory is None:
|
||||
output_path = im_path
|
||||
else:
|
||||
file_name = os.path.basename(im_path)
|
||||
output_path = os.path.join(args.output_directory, file_name)
|
||||
|
||||
im_size = (float(image.size[0] / args.scale),
|
||||
float(image.size[1] / args.scale)) # Width, Height (original)
|
||||
kk, dic_gt = factory_for_gt(im_size, name=im_path, path_gt=args.path_gt)
|
||||
image_t = torchvision.transforms.functional.to_tensor(image).permute(1, 2, 0)
|
||||
|
||||
# Load json
|
||||
basename, ext = os.path.splitext(os.path.basename(im_path))
|
||||
|
||||
extension = ext + '.pifpaf.json'
|
||||
path_json = os.path.join(args.json_dir, basename + extension)
|
||||
annotations = open_annotations(path_json)
|
||||
|
||||
# Run Monoloco
|
||||
boxes, keypoints = preprocess_pifpaf(annotations, im_size, enlarge_boxes=False)
|
||||
dic_out = monoloco.forward(keypoints, kk)
|
||||
dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=False)
|
||||
# Print
|
||||
show_social(args, image_t, output_path, annotations, dic_out)
|
||||
|
||||
print('Image {}\n'.format(cnt) + '-' * 120)
|
||||
cnt += 1
|
||||
|
||||
|
||||
def show_social(args, image_t, output_path, annotations, dic_out):
|
||||
"""Output frontal image with poses or combined with bird eye view"""
|
||||
|
||||
@ -214,24 +115,17 @@ def show_social(args, image_t, output_path, annotations, dic_out):
|
||||
stds = dic_out['stds_ale']
|
||||
xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']]
|
||||
|
||||
# Prepare color for social distancing
|
||||
colors = ['r' if social_interactions(idx, xz_centers, angles, dds,
|
||||
stds=stds,
|
||||
threshold_prob=args.threshold_prob,
|
||||
threshold_dist=args.threshold_dist,
|
||||
radii=args.radii)
|
||||
else 'deepskyblue'
|
||||
for idx, _ in enumerate(dic_out['xyz_pred'])]
|
||||
|
||||
# Draw keypoints and orientation
|
||||
if 'front' in args.output_types:
|
||||
|
||||
# Resize back the tensor image to its original dimensions
|
||||
if not 0.99 < args.scale < 1.01:
|
||||
size = (round(image_t.shape[0] / args.scale), round(image_t.shape[1] / args.scale)) # height width
|
||||
image_t = image_t.permute(2, 0, 1).unsqueeze(0) # batch x channels x height x width
|
||||
image_t = F.interpolate(image_t, size=size).squeeze().permute(1, 2, 0)
|
||||
|
||||
# Prepare color for social distancing
|
||||
colors = ['r' if social_interactions(idx, xz_centers, angles, dds,
|
||||
stds=stds,
|
||||
threshold_prob=args.threshold_prob,
|
||||
threshold_dist=args.threshold_dist,
|
||||
radii=args.radii)
|
||||
else 'deepskyblue'
|
||||
for idx, _ in enumerate(dic_out['xyz_pred'])]
|
||||
|
||||
# Draw keypoints and orientation
|
||||
keypoint_sets, scores = get_pifpaf_outputs(annotations)
|
||||
uv_centers = dic_out['uv_heads']
|
||||
sizes = [abs(dic_out['uv_heads'][idx][1] - uv_s[1]) / 1.5 for idx, uv_s in
|
||||
@ -247,20 +141,20 @@ def show_social(args, image_t, output_path, annotations, dic_out):
|
||||
draw_orientation(ax, uv_centers, sizes, angles, colors, mode='front')
|
||||
|
||||
if 'bird' in args.output_types:
|
||||
with bird_canvas(args, output_path) as ax1:
|
||||
z_max = min(args.z_max, 4 + max([el[1] for el in xz_centers]))
|
||||
with bird_canvas(output_path, z_max) as ax1:
|
||||
draw_orientation(ax1, xz_centers, [], angles, colors, mode='bird')
|
||||
draw_uncertainty(ax1, xz_centers, stds)
|
||||
|
||||
|
||||
def get_pifpaf_outputs(annotations):
|
||||
# TODO extract direct from predictions with pifpaf 0.11+
|
||||
"""Extract keypoints sets and scores from output dictionary"""
|
||||
if not annotations:
|
||||
return [], []
|
||||
keypoints_sets = np.array([dic['keypoints'] for dic in annotations]).reshape(-1, 17, 3)
|
||||
keypoints_sets = np.array([dic['keypoints'] for dic in annotations]).reshape((-1, 17, 3))
|
||||
score_weights = np.ones((keypoints_sets.shape[0], 17))
|
||||
score_weights[:, 3] = 3.0
|
||||
# score_weights[:, 5:] = 0.1
|
||||
# score_weights[:, -2:] = 0.0 # ears are not annotated
|
||||
score_weights /= np.sum(score_weights[0, :])
|
||||
kps_scores = keypoints_sets[:, :, 2]
|
||||
ordered_kps_scores = np.sort(kps_scores, axis=1)[:, ::-1]
|
||||
@ -269,14 +163,14 @@ def get_pifpaf_outputs(annotations):
|
||||
|
||||
|
||||
@contextmanager
|
||||
def bird_canvas(args, output_path):
|
||||
def bird_canvas(output_path, z_max):
|
||||
fig, ax = plt.subplots(1, 1)
|
||||
fig.set_tight_layout(True)
|
||||
output_path = output_path + '.bird.png'
|
||||
x_max = args.z_max / 1.5
|
||||
ax.plot([0, x_max], [0, args.z_max], 'k--')
|
||||
ax.plot([0, -x_max], [0, args.z_max], 'k--')
|
||||
ax.set_ylim(0, args.z_max + 1)
|
||||
x_max = z_max / 1.5
|
||||
ax.plot([0, x_max], [0, z_max], 'k--')
|
||||
ax.plot([0, -x_max], [0, z_max], 'k--')
|
||||
ax.set_ylim(0, z_max + 1)
|
||||
yield ax
|
||||
fig.savefig(output_path)
|
||||
plt.close(fig)
|
||||
|
||||
@ -23,24 +23,28 @@ class ActivityEvaluator:
|
||||
|
||||
def __init__(self, args):
|
||||
|
||||
self.dir_ann = args.dir_ann
|
||||
assert self.dir_ann is not None and os.path.exists(self.dir_ann), \
|
||||
"Annotation directory not provided / does not exist"
|
||||
assert os.listdir(self.dir_ann), "Annotation directory is empty"
|
||||
|
||||
# COLLECTIVE ACTIVITY DATASET (talking)
|
||||
# -------------------------------------------------------------------------------------------------------------
|
||||
if args.dataset == 'collective':
|
||||
self.folders_collective = ['seq02', 'seq14', 'seq12', 'seq13', 'seq11', 'seq36']
|
||||
self.sequences = ['seq02', 'seq14', 'seq12', 'seq13', 'seq11', 'seq36']
|
||||
# folders_collective = ['seq02']
|
||||
self.path_collective = ['data/activity/' + fold for fold in self.folders_collective]
|
||||
self.dir_data = 'data/activity/dataset'
|
||||
self.THRESHOLD_PROB = 0.25 # Concordance for samples
|
||||
self.THRESHOLD_DIST = 2 # Threshold to check distance of people
|
||||
self.RADII = (0.3, 0.5) # expected radii of the o-space
|
||||
self.PIFPAF_CONF = 0.4
|
||||
self.PIFPAF_CONF = 0.3
|
||||
self.SOCIAL_DISTANCE = False
|
||||
# -------------------------------------------------------------------------------------------------------------
|
||||
|
||||
# KITTI DATASET (social distancing)
|
||||
# ------------------------------------------------------------------------------------------------------------
|
||||
else:
|
||||
self.dir_ann_kitti = '/data/lorenzo-data/annotations/kitti/scale_2_july'
|
||||
self.dir_gt_kitti = 'data/kitti/gt_activity'
|
||||
self.dir_data = 'data/kitti/gt_activity'
|
||||
self.dir_kk = os.path.join('data', 'kitti', 'calib')
|
||||
self.THRESHOLD_PROB = 0.25 # Concordance for samples
|
||||
self.THRESHOLD_DIST = 2 # Threshold to check distance of people
|
||||
@ -62,25 +66,25 @@ class ActivityEvaluator:
|
||||
def eval_collective(self):
|
||||
"""Parse Collective Activity Dataset and predict if people are talking or not"""
|
||||
|
||||
for fold in self.path_collective:
|
||||
images = glob.glob(fold + '/*.jpg')
|
||||
initial_path = os.path.join(fold, 'frame0001.jpg')
|
||||
with open(initial_path, 'rb') as f:
|
||||
for seq in self.sequences:
|
||||
images = glob.glob(os.path.join(self.dir_data, 'images', seq + '*.jpg'))
|
||||
initial_im = os.path.join(self.dir_data, 'images', seq + '_frame0001.jpg')
|
||||
with open(initial_im, 'rb') as f:
|
||||
image = Image.open(f).convert('RGB')
|
||||
im_size = image.size
|
||||
assert len(im_size) > 1, "image with frame0001 not available"
|
||||
|
||||
for idx, im_path in enumerate(images):
|
||||
|
||||
# Collect PifPaf files and calibration
|
||||
basename = os.path.basename(im_path)
|
||||
extension = '.pifpaf.json'
|
||||
path_pif = os.path.join(fold, basename + extension)
|
||||
extension = '.predictions.json'
|
||||
path_pif = os.path.join(self.dir_ann, basename + extension)
|
||||
annotations = open_annotations(path_pif)
|
||||
kk, _ = factory_for_gt(im_size, verbose=False)
|
||||
|
||||
# Collect corresponding gt files (ys_gt: 1 or 0)
|
||||
boxes_gt, ys_gt = parse_gt_collective(fold, path_pif)
|
||||
|
||||
boxes_gt, ys_gt = parse_gt_collective(self.dir_data, seq, path_pif)
|
||||
# Run Monoloco
|
||||
dic_out, boxes = self.run_monoloco(annotations, kk, im_size=im_size)
|
||||
|
||||
@ -88,17 +92,19 @@ class ActivityEvaluator:
|
||||
matches = get_iou_matches(boxes, boxes_gt, iou_min=0.3)
|
||||
|
||||
# Estimate activity
|
||||
categories = [os.path.basename(fold)] * len(boxes_gt)
|
||||
categories = [seq] * len(boxes_gt) # for compatibility with KITTI evaluation
|
||||
self.estimate_activity(dic_out, matches, ys_gt, categories=categories)
|
||||
|
||||
# Print Results
|
||||
cout_results(self.cnt, self.all_gt, self.all_pred, categories=self.folders_collective)
|
||||
acc = accuracy_score(self.all_gt[seq], self.all_pred[seq])
|
||||
print(f"Accuracy of category {seq}: {100*acc:.2f}%")
|
||||
cout_results(self.cnt, self.all_gt, self.all_pred, categories=self.sequences)
|
||||
|
||||
def eval_kitti(self):
|
||||
"""Parse KITTI Dataset and predict if people are talking or not"""
|
||||
|
||||
from ..utils import factory_file
|
||||
files = glob.glob(self.dir_gt_kitti + '/*.txt')
|
||||
files = glob.glob(self.dir_data + '/*.txt')
|
||||
# files = [self.dir_gt_kitti + '/001782.txt']
|
||||
assert files, "Empty directory"
|
||||
|
||||
@ -107,10 +113,10 @@ class ActivityEvaluator:
|
||||
# Collect PifPaf files and calibration
|
||||
basename, _ = os.path.splitext(os.path.basename(file))
|
||||
path_calib = os.path.join(self.dir_kk, basename + '.txt')
|
||||
annotations, kk, tt = factory_file(path_calib, self.dir_ann_kitti, basename)
|
||||
annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename)
|
||||
|
||||
# Collect corresponding gt files (ys_gt: 1 or 0)
|
||||
path_gt = os.path.join(self.dir_gt_kitti, basename + '.txt')
|
||||
path_gt = os.path.join(self.dir_data, basename + '.txt')
|
||||
boxes_gt, ys_gt, difficulties = parse_gt_kitti(path_gt)
|
||||
|
||||
# Run Monoloco
|
||||
@ -131,17 +137,16 @@ class ActivityEvaluator:
|
||||
angles = dic_out['angles']
|
||||
dds = dic_out['dds_pred']
|
||||
stds = dic_out['stds_ale']
|
||||
confs = dic_out['confs']
|
||||
xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']]
|
||||
|
||||
# Count gt statistics
|
||||
# Count gt statistics. (One element each gt)
|
||||
for key in categories:
|
||||
self.cnt['gt'][key] += 1
|
||||
self.cnt['gt']['all'] += 1
|
||||
|
||||
for i_m, (idx, idx_gt) in enumerate(matches):
|
||||
|
||||
# Select keys to update resultd for Collective or KITTI
|
||||
# Select keys to update results for Collective or KITTI
|
||||
keys = ('all', categories[idx_gt])
|
||||
|
||||
# Run social interactions rule
|
||||
@ -166,10 +171,12 @@ class ActivityEvaluator:
|
||||
return dic_out, boxes
|
||||
|
||||
|
||||
def parse_gt_collective(fold, path_pif):
|
||||
def parse_gt_collective(dir_data, seq, path_pif):
|
||||
"""Parse both gt and binary label (1/0) for talking or not"""
|
||||
|
||||
with open(os.path.join(fold, "annotations.txt"), "r") as ff:
|
||||
path = os.path.join(dir_data, 'annotations', seq + '_annotations.txt')
|
||||
|
||||
with open(path, "r") as ff:
|
||||
reader = csv.reader(ff, delimiter='\t')
|
||||
dic_frames = defaultdict(lambda: defaultdict(list))
|
||||
for idx, line in enumerate(reader):
|
||||
@ -212,17 +219,21 @@ def cout_results(cnt, all_gt, all_pred, categories=()):
|
||||
# Split by folders for collective activity
|
||||
for key in categories:
|
||||
acc = accuracy_score(all_gt[key], all_pred[key])
|
||||
print("Accuracy of category {}: {:.2f}% , Recall: {:.2f}%, #: {}, Predicted positive: {:.2f}%"
|
||||
print("Accuracy of category {}: {:.2f}% , Recall: {:.2f}%, #: {}, Pred/Real positive: {:.1f}% / {:.1f}%"
|
||||
.format(key,
|
||||
acc * 100,
|
||||
cnt['pred'][key] / cnt['gt'][key]*100,
|
||||
cnt['pred'][key],
|
||||
sum(all_gt[key]) / len(all_gt[key]) * 100))
|
||||
sum(all_pred[key]) / len(all_pred[key]) * 100,
|
||||
sum(all_gt[key]) / len(all_gt[key]) * 100
|
||||
)
|
||||
)
|
||||
|
||||
# Final Accuracy
|
||||
acc = accuracy_score(all_gt['all'], all_pred['all'])
|
||||
recall = cnt['pred']['all'] / cnt['gt']['all'] * 100 # only predictions that match a ground-truth are included
|
||||
print('-' * 80)
|
||||
print("Final Accuracy: {:.2f}%".format(acc * 100))
|
||||
print(f"Final Accuracy: {acc * 100:.2f} Final Recall:{recall:.2f}")
|
||||
print('-' * 80)
|
||||
|
||||
|
||||
@ -244,8 +255,8 @@ def convert_category(cat):
|
||||
def extract_frame_number(path):
|
||||
"""extract frame number from path"""
|
||||
name = os.path.basename(path)
|
||||
if name[5] == '0':
|
||||
frame = name[6:9]
|
||||
if name[11] == '0':
|
||||
frame = name[12:15]
|
||||
else:
|
||||
frame = name[5:9]
|
||||
frame = name[11:15]
|
||||
return frame
|
||||
|
||||
@ -25,41 +25,58 @@ class EvalKitti:
|
||||
'27', '29', '31', '49')
|
||||
ALP_THRESHOLDS = ('<0.5m', '<1m', '<2m')
|
||||
OUR_METHODS = ['geometric', 'monoloco', 'monoloco_pp', 'pose', 'reid', 'monstereo']
|
||||
METHODS_MONO = ['m3d', 'monopsr']
|
||||
METHODS_MONO = ['m3d', 'monopsr', 'smoke', 'monodis']
|
||||
METHODS_STEREO = ['3dop', 'psf', 'pseudo-lidar', 'e2e', 'oc-stereo']
|
||||
BASELINES = ['task_error', 'pixel_error']
|
||||
HEADERS = ('method', '<0.5', '<1m', '<2m', 'easy', 'moderate', 'hard', 'all')
|
||||
CATEGORIES = ('pedestrian',)
|
||||
methods = OUR_METHODS + METHODS_MONO + METHODS_STEREO
|
||||
|
||||
def __init__(self, thresh_iou_monoloco=0.3, thresh_iou_base=0.3, thresh_conf_monoloco=0.2, thresh_conf_base=0.5,
|
||||
verbose=False):
|
||||
# Set directories
|
||||
main_dir = os.path.join('data', 'kitti')
|
||||
dir_gt = os.path.join(main_dir, 'gt')
|
||||
path_train = os.path.join('splits', 'kitti_train.txt')
|
||||
path_val = os.path.join('splits', 'kitti_val.txt')
|
||||
dir_logs = os.path.join('data', 'logs')
|
||||
assert os.path.exists(dir_logs), "No directory to save final statistics"
|
||||
dir_fig = os.path.join('data', 'figures')
|
||||
assert os.path.exists(dir_logs), "No directory to save figures"
|
||||
|
||||
self.main_dir = os.path.join('data', 'kitti')
|
||||
self.dir_gt = os.path.join(self.main_dir, 'gt')
|
||||
self.methods = self.OUR_METHODS + self.METHODS_MONO + self.METHODS_STEREO
|
||||
path_train = os.path.join('splits', 'kitti_train.txt')
|
||||
path_val = os.path.join('splits', 'kitti_val.txt')
|
||||
dir_logs = os.path.join('data', 'logs')
|
||||
assert dir_logs, "No directory to save final statistics"
|
||||
# Set thresholds to obtain comparable recalls
|
||||
thresh_iou_monoloco = 0.3
|
||||
thresh_iou_base = 0.3
|
||||
thresh_conf_monoloco = 0.2
|
||||
thresh_conf_base = 0.5
|
||||
|
||||
def __init__(self, args):
|
||||
|
||||
self.verbose = args.verbose
|
||||
self.net = args.net
|
||||
self.save = args.save
|
||||
self.show = args.show
|
||||
|
||||
now = datetime.datetime.now()
|
||||
now_time = now.strftime("%Y%m%d-%H%M")[2:]
|
||||
self.path_results = os.path.join(dir_logs, 'eval-' + now_time + '.json')
|
||||
self.verbose = verbose
|
||||
self.path_results = os.path.join(self.dir_logs, 'eval-' + now_time + '.json')
|
||||
|
||||
self.dic_thresh_iou = {method: (thresh_iou_monoloco if method in self.OUR_METHODS
|
||||
else thresh_iou_base)
|
||||
# Set thresholds for comparable recalls
|
||||
self.dic_thresh_iou = {method: (self.thresh_iou_monoloco if method in self.OUR_METHODS
|
||||
else self.thresh_iou_base)
|
||||
for method in self.methods}
|
||||
self.dic_thresh_conf = {method: (thresh_conf_monoloco if method in self.OUR_METHODS
|
||||
else thresh_conf_base)
|
||||
self.dic_thresh_conf = {method: (self.thresh_conf_monoloco if method in self.OUR_METHODS
|
||||
else self.thresh_conf_base)
|
||||
for method in self.methods}
|
||||
self.dic_thresh_conf['monopsr'] += 0.3
|
||||
self.dic_thresh_conf['e2e-pl'] = -100 # They don't have enough detections
|
||||
|
||||
# Set thresholds to obtain comparable recall
|
||||
self.dic_thresh_conf['monopsr'] += 0.4
|
||||
self.dic_thresh_conf['e2e-pl'] = -100
|
||||
self.dic_thresh_conf['oc-stereo'] = -100
|
||||
self.dic_thresh_conf['smoke'] = -100
|
||||
self.dic_thresh_conf['monodis'] = -100
|
||||
|
||||
# Extract validation images for evaluation
|
||||
names_gt = tuple(os.listdir(self.dir_gt))
|
||||
_, self.set_val = split_training(names_gt, path_train, path_val)
|
||||
_, self.set_val = split_training(names_gt, self.path_train, self.path_val)
|
||||
|
||||
# self.set_val = ('002282.txt', )
|
||||
|
||||
@ -68,10 +85,13 @@ class EvalKitti:
|
||||
= None
|
||||
self.cnt = 0
|
||||
|
||||
# Filter methods with empty or non existent directory
|
||||
filter_directories(self.main_dir, self.methods)
|
||||
|
||||
def run(self):
|
||||
"""Evaluate Monoloco performances on ALP and ALE metrics"""
|
||||
for self.category in self.CATEGORIES:
|
||||
|
||||
for self.category in self.CATEGORIES:
|
||||
# Initialize variables
|
||||
self.errors = defaultdict(lambda: defaultdict(list))
|
||||
self.dic_stds = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
||||
@ -90,7 +110,7 @@ class EvalKitti:
|
||||
methods_out = defaultdict(tuple) # Save all methods for comparison
|
||||
|
||||
# Count ground_truth:
|
||||
boxes_gt, ys, truncs_gt, occs_gt = out_gt
|
||||
boxes_gt, ys, truncs_gt, occs_gt = out_gt # pylint: disable=unbalanced-tuple-unpacking
|
||||
for idx, box in enumerate(boxes_gt):
|
||||
mode = get_difficulty(box, truncs_gt[idx], occs_gt[idx])
|
||||
self.cnt_gt[mode] += 1
|
||||
@ -100,7 +120,6 @@ class EvalKitti:
|
||||
for method in self.methods:
|
||||
# Extract annotations
|
||||
dir_method = os.path.join(self.main_dir, method)
|
||||
assert os.path.exists(dir_method), "directory of the method %s does not exists" % method
|
||||
path_method = os.path.join(dir_method, name)
|
||||
methods_out[method] = self._parse_txts(path_method, method=method)
|
||||
|
||||
@ -124,12 +143,14 @@ class EvalKitti:
|
||||
print('\n' + self.category.upper() + ':')
|
||||
self.show_statistics()
|
||||
|
||||
def printer(self, show, save):
|
||||
if save or show:
|
||||
show_results(self.dic_stats, self.CLUSTERS, show=show, save=save)
|
||||
show_spread(self.dic_stats, self.CLUSTERS, show=show, save=save)
|
||||
show_box_plot(self.errors, self.CLUSTERS, show=show, save=save)
|
||||
show_task_error(show=show, save=save)
|
||||
def printer(self):
|
||||
if self.save or self.show:
|
||||
show_results(self.dic_stats, self.CLUSTERS, self.net, self.dir_fig, show=self.show, save=self.save)
|
||||
show_spread(self.dic_stats, self.CLUSTERS, self.net, self.dir_fig, show=self.show, save=self.save)
|
||||
if self.net == 'monstero':
|
||||
show_box_plot(self.errors, self.CLUSTERS, self.dir_fig, show=self.show, save=self.save)
|
||||
else:
|
||||
show_task_error(self.dir_fig, show=self.show, save=self.save)
|
||||
|
||||
def _parse_txts(self, path, method):
|
||||
|
||||
@ -352,7 +373,7 @@ class EvalKitti:
|
||||
self.name = name
|
||||
# Iterate over each line of the gt file and save box location and distances
|
||||
out_gt = parse_ground_truth(path_gt, 'pedestrian')
|
||||
boxes_gt, ys, truncs_gt, occs_gt = out_gt
|
||||
boxes_gt, ys, truncs_gt, occs_gt = out_gt # pylint: disable=unbalanced-tuple-unpacking
|
||||
for label in ys:
|
||||
heights.append(label[4])
|
||||
import numpy as np
|
||||
@ -430,3 +451,14 @@ def extract_indices(idx_to_check, *args):
|
||||
def average(my_list):
|
||||
"""calculate mean of a list"""
|
||||
return sum(my_list) / len(my_list)
|
||||
|
||||
|
||||
def filter_directories(main_dir, methods):
|
||||
for method in methods:
|
||||
dir_method = os.path.join(main_dir, method)
|
||||
if not os.path.exists(dir_method):
|
||||
methods.remove(method)
|
||||
print(f"\nMethod {method}. No directory found. Skipping it..")
|
||||
elif not os.listdir(dir_method):
|
||||
methods.remove(method)
|
||||
print(f"\nMethod {method}. Directory is empty. Skipping it..")
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
|
||||
# pylint: disable=too-many-statements,cyclic-import, too-many-branches
|
||||
# pylint: disable=too-many-statements,too-many-branches,cyclic-import
|
||||
|
||||
"""Joints Analysis: Supplementary material of MonStereo"""
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
|
||||
#pylint: disable=too-many-branches
|
||||
# pylint: disable=too-many-branches
|
||||
|
||||
"""
|
||||
Run MonoLoco/MonStereo and converts annotations into KITTI format
|
||||
@ -22,39 +22,35 @@ from .reid_baseline import get_reid_features, ReID
|
||||
|
||||
class GenerateKitti:
|
||||
|
||||
METHODS = ['monstereo', 'monoloco_pp', 'monoloco', 'geometric']
|
||||
dir_gt = os.path.join('data', 'kitti', 'gt')
|
||||
dir_gt_new = os.path.join('data', 'kitti', 'gt_new')
|
||||
dir_kk = os.path.join('data', 'kitti', 'calib')
|
||||
dir_byc = '/data/lorenzo-data/kitti/object_detection/left'
|
||||
monoloco_checkpoint = 'data/models/monoloco-190717-0952.pkl'
|
||||
baselines = {'mono': [], 'stereo': []}
|
||||
|
||||
def __init__(self, model, dir_ann, p_dropout=0.2, n_dropout=0, hidden_size=1024):
|
||||
def __init__(self, args):
|
||||
|
||||
# Load Network
|
||||
self.net = args.net
|
||||
assert args.net in ('monstereo', 'monoloco_pp'), "net not recognized"
|
||||
|
||||
# Load monoloco
|
||||
use_cuda = torch.cuda.is_available()
|
||||
device = torch.device("cuda" if use_cuda else "cpu")
|
||||
|
||||
if 'monstereo' in self.METHODS:
|
||||
self.monstereo = Loco(model=model, net='monstereo', device=device, n_dropout=n_dropout, p_dropout=p_dropout,
|
||||
linear_size=hidden_size)
|
||||
# model_mono_pp = 'data/models/monoloco-191122-1122.pkl' # KITTI_p
|
||||
# model_mono_pp = 'data/models/monoloco-191018-1459.pkl' # nuScenes_p
|
||||
model_mono_pp = 'data/models/stereoloco-200604-0949.pkl' # KITTI_pp
|
||||
# model_mono_pp = 'data/models/stereoloco-200608-1550.pkl' # nuScenes_pp
|
||||
|
||||
if 'monoloco_pp' in self.METHODS:
|
||||
self.monoloco_pp = Loco(model=model_mono_pp, net='monoloco_pp', device=device, n_dropout=n_dropout,
|
||||
p_dropout=p_dropout)
|
||||
|
||||
if 'monoloco' in self.METHODS:
|
||||
model_mono = 'data/models/monoloco-190717-0952.pkl' # KITTI
|
||||
# model_mono = 'data/models/monoloco-190719-0923.pkl' # NuScenes
|
||||
self.monoloco = Loco(model=model_mono, net='monoloco', device=device, n_dropout=n_dropout,
|
||||
p_dropout=p_dropout, linear_size=256)
|
||||
self.dir_ann = dir_ann
|
||||
self.model = Loco(
|
||||
model=args.model,
|
||||
net=args.net,
|
||||
device=device,
|
||||
n_dropout=args.n_dropout,
|
||||
p_dropout=args.dropout,
|
||||
linear_size=args.hidden_size
|
||||
)
|
||||
|
||||
# Extract list of pifpaf files in validation images
|
||||
self.dir_gt = os.path.join('data', 'kitti', 'gt')
|
||||
self.dir_gt_new = os.path.join('data', 'kitti', 'gt_new')
|
||||
self.set_basename = factory_basename(dir_ann, self.dir_gt)
|
||||
self.dir_kk = os.path.join('data', 'kitti', 'calib')
|
||||
self.dir_byc = '/data/lorenzo-data/kitti/object_detection/left'
|
||||
self.dir_ann = args.dir_ann
|
||||
self.generate_official = args.generate_official
|
||||
assert os.listdir(self.dir_ann), "Annotation directory is empty"
|
||||
self.set_basename = factory_basename(args.dir_ann, self.dir_gt)
|
||||
|
||||
# For quick testing
|
||||
# ------------------------------------------------------------------------------------------------------------
|
||||
@ -62,33 +58,48 @@ class GenerateKitti:
|
||||
# self.set_basename = ('002282',)
|
||||
# ------------------------------------------------------------------------------------------------------------
|
||||
|
||||
# Calculate stereo baselines
|
||||
# self.baselines = ['pose', 'reid']
|
||||
self.baselines = []
|
||||
self.cnt_disparity = defaultdict(int)
|
||||
self.cnt_no_stereo = 0
|
||||
self.dir_images = os.path.join('data', 'kitti', 'images')
|
||||
self.dir_images_r = os.path.join('data', 'kitti', 'images_r')
|
||||
# ReID Baseline
|
||||
if 'reid' in self.baselines:
|
||||
weights_path = 'data/models/reid_model_market.pkl'
|
||||
self.reid_net = ReID(weights_path=weights_path, device=device, num_classes=751, height=256, width=128)
|
||||
# Add monocular and stereo baselines (they require monoloco as backbone)
|
||||
if args.baselines:
|
||||
|
||||
# Load MonoLoco
|
||||
self.baselines['mono'] = ['monoloco', 'geometric']
|
||||
self.monoloco = Loco(
|
||||
model=self.monoloco_checkpoint,
|
||||
net='monoloco',
|
||||
device=device,
|
||||
n_dropout=args.n_dropout,
|
||||
p_dropout=args.dropout,
|
||||
linear_size=256
|
||||
)
|
||||
# Stereo baselines
|
||||
if args.net == 'monstereo':
|
||||
self.baselines['stereo'] = ['pose', 'reid']
|
||||
self.cnt_disparity = defaultdict(int)
|
||||
self.cnt_no_stereo = 0
|
||||
self.dir_images = os.path.join('data', 'kitti', 'images')
|
||||
self.dir_images_r = os.path.join('data', 'kitti', 'images_r')
|
||||
|
||||
# ReID Baseline
|
||||
weights_path = 'data/models/reid_model_market.pkl'
|
||||
self.reid_net = ReID(weights_path=weights_path, device=device, num_classes=751, height=256, width=128)
|
||||
|
||||
def run(self):
|
||||
"""Run Monoloco and save txt files for KITTI evaluation"""
|
||||
|
||||
cnt_ann = cnt_file = cnt_no_file = 0
|
||||
dir_out = {key: os.path.join('data', 'kitti', key) for key in self.METHODS}
|
||||
print("\n")
|
||||
for key in self.METHODS:
|
||||
make_new_directory(dir_out[key])
|
||||
|
||||
for key in self.baselines:
|
||||
dir_out[key] = os.path.join('data', 'kitti', key)
|
||||
make_new_directory(dir_out[key])
|
||||
print("Created empty output directory for {}".format(key))
|
||||
# Prepare empty folder
|
||||
di = os.path.join('data', 'kitti', self.net)
|
||||
make_new_directory(di)
|
||||
dir_out = {self.net: di}
|
||||
|
||||
# Run monoloco over the list of images
|
||||
for mode, names in self.baselines.items():
|
||||
for name in names:
|
||||
di = os.path.join('data', 'kitti', name)
|
||||
make_new_directory(di)
|
||||
dir_out[name] = di
|
||||
|
||||
# Run the model
|
||||
for basename in self.set_basename:
|
||||
path_calib = os.path.join(self.dir_kk, basename + '.txt')
|
||||
annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename)
|
||||
@ -98,58 +109,60 @@ class GenerateKitti:
|
||||
annotations_r, _, _ = factory_file(path_calib, self.dir_ann, basename, mode='right')
|
||||
_, keypoints_r = preprocess_pifpaf(annotations_r, im_size=(1242, 374))
|
||||
|
||||
if self.net == 'monstereo':
|
||||
dic_out = self.model.forward(keypoints, kk, keypoints_r=keypoints_r)
|
||||
elif self.net == 'monoloco_pp':
|
||||
dic_out = self.model.forward(keypoints, kk)
|
||||
|
||||
all_outputs = {self.net: [dic_out['xyzd'], dic_out['bi'], dic_out['epi'],
|
||||
dic_out['yaw'], dic_out['h'], dic_out['w'], dic_out['l']]}
|
||||
zzs = [float(el[2]) for el in dic_out['xyzd']]
|
||||
|
||||
# Save txt files
|
||||
params = [kk, tt]
|
||||
path_txt = os.path.join(dir_out[self.net], basename + '.txt')
|
||||
save_txts(path_txt, boxes, all_outputs[self.net], params, mode=self.net, cat=cat)
|
||||
cnt_ann += len(boxes)
|
||||
cnt_file += 1
|
||||
all_inputs, all_outputs = {}, {}
|
||||
|
||||
# STEREOLOCO
|
||||
dic_out = self.monstereo.forward(keypoints, kk, keypoints_r=keypoints_r)
|
||||
all_outputs['monstereo'] = [dic_out['xyzd'], dic_out['bi'], dic_out['epi'],
|
||||
dic_out['yaw'], dic_out['h'], dic_out['w'], dic_out['l']]
|
||||
|
||||
# MONOLOCO++
|
||||
if 'monoloco_pp' in self.METHODS:
|
||||
dic_out = self.monoloco_pp.forward(keypoints, kk)
|
||||
all_outputs['monoloco_pp'] = [dic_out['xyzd'], dic_out['bi'], dic_out['epi'],
|
||||
dic_out['yaw'], dic_out['h'], dic_out['w'], dic_out['l']]
|
||||
zzs = [float(el[2]) for el in dic_out['xyzd']]
|
||||
|
||||
# MONOLOCO
|
||||
if 'monoloco' in self.METHODS:
|
||||
# MONO (+ STEREO BASELINES)
|
||||
if self.baselines['mono']:
|
||||
# MONOLOCO
|
||||
dic_out = self.monoloco.forward(keypoints, kk)
|
||||
zzs_geom, xy_centers = geometric_coordinates(keypoints, kk, average_y=0.48)
|
||||
all_outputs['monoloco'] = [dic_out['d'], dic_out['bi'], dic_out['epi']] + [zzs_geom, xy_centers]
|
||||
all_outputs['geometric'] = all_outputs['monoloco']
|
||||
|
||||
params = [kk, tt]
|
||||
# monocular baselines
|
||||
for key in self.baselines['mono']:
|
||||
path_txt = {key: os.path.join(dir_out[key], basename + '.txt')}
|
||||
save_txts(path_txt[key], boxes, all_outputs[key], params, mode=key, cat=cat)
|
||||
|
||||
for key in self.METHODS:
|
||||
path_txt = {key: os.path.join(dir_out[key], basename + '.txt')}
|
||||
save_txts(path_txt[key], boxes, all_outputs[key], params, mode=key, cat=cat)
|
||||
# stereo baselines
|
||||
if self.baselines['stereo']:
|
||||
all_inputs = {}
|
||||
dic_xyz = self._run_stereo_baselines(basename, boxes, keypoints, zzs, path_calib)
|
||||
for key in dic_xyz:
|
||||
all_outputs[key] = all_outputs['monoloco'].copy()
|
||||
all_outputs[key][0] = dic_xyz[key]
|
||||
all_inputs[key] = boxes
|
||||
|
||||
# STEREO BASELINES
|
||||
if self.baselines:
|
||||
dic_xyz = self._run_stereo_baselines(basename, boxes, keypoints, zzs, path_calib)
|
||||
|
||||
for key in dic_xyz:
|
||||
all_outputs[key] = all_outputs['monoloco'].copy()
|
||||
all_outputs[key][0] = dic_xyz[key]
|
||||
all_inputs[key] = boxes
|
||||
|
||||
path_txt[key] = os.path.join(dir_out[key], basename + '.txt')
|
||||
save_txts(path_txt[key], all_inputs[key], all_outputs[key], params, mode='baseline', cat=cat)
|
||||
path_txt[key] = os.path.join(dir_out[key], basename + '.txt')
|
||||
save_txts(path_txt[key], all_inputs[key], all_outputs[key], params,
|
||||
mode='baseline',
|
||||
cat=cat)
|
||||
|
||||
print("\nSaved in {} txt {} annotations. Not found {} images".format(cnt_file, cnt_ann, cnt_no_file))
|
||||
|
||||
if 'monstereo' in self.METHODS:
|
||||
if self.net == 'monstereo':
|
||||
print("STEREO:")
|
||||
for key in self.baselines:
|
||||
for key in self.baselines['stereo']:
|
||||
print("Annotations corrected using {} baseline: {:.1f}%".format(
|
||||
key, self.cnt_disparity[key] / cnt_ann * 100))
|
||||
print("Maximum possible stereo associations: {:.1f}%".format(self.cnt_disparity['max'] / cnt_ann * 100))
|
||||
print("Not found {}/{} stereo files".format(self.cnt_no_stereo, cnt_file))
|
||||
|
||||
create_empty_files(dir_out) # Create empty files for official evaluation
|
||||
if self.generate_official:
|
||||
create_empty_files(dir_out, self.net) # Create empty files for official evaluation
|
||||
|
||||
def _run_stereo_baselines(self, basename, boxes, keypoints, zzs, path_calib):
|
||||
|
||||
@ -165,14 +178,14 @@ class GenerateKitti:
|
||||
path_image = os.path.join(self.dir_images, basename + '.png')
|
||||
path_image_r = os.path.join(self.dir_images_r, basename + '.png')
|
||||
reid_features = get_reid_features(self.reid_net, boxes, boxes_r, path_image, path_image_r)
|
||||
dic_zzs, cnt = baselines_association(self.baselines, zzs, keypoints, keypoints_r, reid_features)
|
||||
dic_zzs, cnt = baselines_association(self.baselines['stereo'], zzs, keypoints, keypoints_r, reid_features)
|
||||
|
||||
for key in cnt:
|
||||
self.cnt_disparity[key] += cnt[key]
|
||||
|
||||
else:
|
||||
self.cnt_no_stereo += 1
|
||||
dic_zzs = {key: zzs for key in self.baselines}
|
||||
dic_zzs = {key: zzs for key in self.baselines['stereo']}
|
||||
|
||||
# Combine the stereo zz with x, y from 2D detection (no MonoLoco involved)
|
||||
dic_xyz = defaultdict(list)
|
||||
@ -227,8 +240,9 @@ def save_txts(path_txt, all_inputs, all_outputs, all_params, mode='monoloco', ca
|
||||
conf_scale = 0.03
|
||||
elif mode == 'monoloco_pp':
|
||||
conf_scale = 0.033
|
||||
# conf_scale = 0.035 # nuScenes for having same recall
|
||||
else:
|
||||
conf_scale = 0.055
|
||||
conf_scale = 0.05
|
||||
conf = conf_scale * (uv_box[-1]) / (bi / math.sqrt(xx ** 2 + yy ** 2 + zz ** 2))
|
||||
|
||||
output_list = [alpha] + uv_box[:-1] + hwl + cam_0 + [ry, conf, bi, epi]
|
||||
@ -244,11 +258,10 @@ def save_txts(path_txt, all_inputs, all_outputs, all_params, mode='monoloco', ca
|
||||
ff.write("\n")
|
||||
|
||||
|
||||
def create_empty_files(dir_out):
|
||||
def create_empty_files(dir_out, net):
|
||||
"""Create empty txt files to run official kitti metrics on MonStereo and all other methods"""
|
||||
|
||||
methods = ['pseudo-lidar', 'monopsr', '3dop', 'm3d', 'oc-stereo', 'e2e']
|
||||
methods = []
|
||||
methods = ['pseudo-lidar', 'monopsr', '3dop', 'm3d', 'oc-stereo', 'e2e', 'monodis', 'smoke']
|
||||
dirs = [os.path.join('data', 'kitti', method) for method in methods]
|
||||
dirs_orig = [os.path.join('data', 'kitti', method + '-orig') for method in methods]
|
||||
|
||||
@ -263,8 +276,7 @@ def create_empty_files(dir_out):
|
||||
# If the file exits, rewrite in new folder, otherwise create empty file
|
||||
read_and_rewrite(path_orig, path)
|
||||
|
||||
for method in ('monoloco_pp', 'monstereo'):
|
||||
for i in range(7481):
|
||||
name = "0" * (6 - len(str(i))) + str(i) + '.txt'
|
||||
ff = open(os.path.join(dir_out[method], name), "a+")
|
||||
ff.close()
|
||||
for i in range(7481):
|
||||
name = "0" * (6 - len(str(i))) + str(i) + '.txt'
|
||||
ff = open(os.path.join(dir_out[net], name), "a+")
|
||||
ff.close()
|
||||
|
||||
@ -29,7 +29,7 @@ def get_reid_features(reid_net, boxes, boxes_r, path_image, path_image_r):
|
||||
|
||||
class ReID(object):
|
||||
def __init__(self, weights_path, device, num_classes=751, height=256, width=128):
|
||||
super(ReID, self).__init__()
|
||||
super().__init__()
|
||||
torch.manual_seed(1)
|
||||
self.device = device
|
||||
|
||||
@ -90,7 +90,7 @@ class ReID(object):
|
||||
|
||||
class ResNet50(nn.Module):
|
||||
def __init__(self, num_classes, loss):
|
||||
super(ResNet50, self).__init__()
|
||||
super().__init__()
|
||||
self.loss = loss
|
||||
resnet50 = torchvision.models.resnet50(pretrained=True)
|
||||
self.base = nn.Sequential(*list(resnet50.children())[:-2])
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
|
||||
from .net import Loco
|
||||
from .pifpaf import PifPaf, ImageList
|
||||
from .process import unnormalize_bi, extract_outputs, extract_labels, extract_labels_aux
|
||||
|
||||
@ -6,7 +6,7 @@ import torch.nn as nn
|
||||
class MonStereoModel(nn.Module):
|
||||
|
||||
def __init__(self, input_size, output_size=2, linear_size=512, p_dropout=0.2, num_stage=3, device='cuda'):
|
||||
super(MonStereoModel, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
self.num_stage = num_stage
|
||||
self.stereo_size = input_size
|
||||
@ -73,7 +73,7 @@ class MonStereoModel(nn.Module):
|
||||
|
||||
class MyLinearSimple(nn.Module):
|
||||
def __init__(self, linear_size, p_dropout=0.5):
|
||||
super(MyLinearSimple, self).__init__()
|
||||
super().__init__()
|
||||
self.l_size = linear_size
|
||||
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
@ -109,7 +109,7 @@ class MonolocoModel(nn.Module):
|
||||
"""
|
||||
|
||||
def __init__(self, input_size, output_size=2, linear_size=256, p_dropout=0.2, num_stage=3):
|
||||
super(MonolocoModel, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
self.input_size = input_size
|
||||
self.output_size = output_size
|
||||
@ -147,7 +147,7 @@ class MonolocoModel(nn.Module):
|
||||
|
||||
class MyLinear(nn.Module):
|
||||
def __init__(self, linear_size, p_dropout=0.5):
|
||||
super(MyLinear, self).__init__()
|
||||
super().__init__()
|
||||
self.l_size = linear_size
|
||||
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
@ -56,7 +56,7 @@ class Loco:
|
||||
output_size=output_size)
|
||||
else:
|
||||
self.model = MonStereoModel(p_dropout=p_dropout, input_size=input_size, output_size=output_size,
|
||||
linear_size=linear_size, device=self.device)
|
||||
linear_size=linear_size, device=self.device)
|
||||
|
||||
self.model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage))
|
||||
else:
|
||||
@ -163,7 +163,7 @@ class Loco:
|
||||
print("found {} matches with ground-truth".format(len(matches)))
|
||||
|
||||
# Keep track of instances non-matched
|
||||
idxs_matches = (el[0] for el in matches)
|
||||
idxs_matches = [el[0] for el in matches]
|
||||
not_matches = [idx for idx, _ in enumerate(boxes) if idx not in idxs_matches]
|
||||
|
||||
else:
|
||||
|
||||
@ -1,102 +0,0 @@
|
||||
|
||||
import glob
|
||||
|
||||
import numpy as np
|
||||
import torchvision
|
||||
import torch
|
||||
from PIL import Image, ImageFile
|
||||
from openpifpaf.network import nets
|
||||
from openpifpaf import decoder
|
||||
|
||||
from .process import image_transform
|
||||
|
||||
|
||||
class ImageList(torch.utils.data.Dataset):
|
||||
"""It defines transformations to apply to images and outputs of the dataloader"""
|
||||
def __init__(self, image_paths, scale):
|
||||
self.image_paths = image_paths
|
||||
self.image_paths.sort()
|
||||
self.scale = scale
|
||||
|
||||
def __getitem__(self, index):
|
||||
image_path = self.image_paths[index]
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
with open(image_path, 'rb') as f:
|
||||
image = Image.open(f).convert('RGB')
|
||||
|
||||
if self.scale > 1.01 or self.scale < 0.99:
|
||||
image = torchvision.transforms.functional.resize(image,
|
||||
(round(self.scale * image.size[1]),
|
||||
round(self.scale * image.size[0])),
|
||||
interpolation=Image.BICUBIC)
|
||||
# PIL images are not iterables
|
||||
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
|
||||
image = image_transform(image)
|
||||
|
||||
return image_path, original_image, image
|
||||
|
||||
def __len__(self):
|
||||
return len(self.image_paths)
|
||||
|
||||
|
||||
def factory_from_args(args):
|
||||
|
||||
# Merge the model_pifpaf argument
|
||||
if not args.checkpoint:
|
||||
args.checkpoint = 'resnet152' # Default model Resnet 152
|
||||
# glob
|
||||
if args.glob:
|
||||
args.images += glob.glob(args.glob)
|
||||
if not args.images:
|
||||
raise Exception("no image files given")
|
||||
|
||||
# add args.device
|
||||
args.device = torch.device('cpu')
|
||||
args.pin_memory = False
|
||||
if torch.cuda.is_available():
|
||||
args.device = torch.device('cuda')
|
||||
args.pin_memory = True
|
||||
|
||||
# Add num_workers
|
||||
args.loader_workers = 8
|
||||
|
||||
# Add visualization defaults
|
||||
args.figure_width = 10
|
||||
args.dpi_factor = 1.0
|
||||
|
||||
return args
|
||||
|
||||
|
||||
class PifPaf:
|
||||
def __init__(self, args):
|
||||
"""Instanciate the mdodel"""
|
||||
factory_from_args(args)
|
||||
model_pifpaf, _ = nets.factory_from_args(args)
|
||||
model_pifpaf = model_pifpaf.to(args.device)
|
||||
self.processor = decoder.factory_from_args(args, model_pifpaf)
|
||||
self.keypoints_whole = []
|
||||
|
||||
# Scale the keypoints to the original image size for printing (if not webcam)
|
||||
self.scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
|
||||
|
||||
def fields(self, processed_images):
|
||||
"""Encoder for pif and paf fields"""
|
||||
fields_batch = self.processor.fields(processed_images)
|
||||
return fields_batch
|
||||
|
||||
def forward(self, image, processed_image_cpu, fields):
|
||||
"""Decoder, from pif and paf fields to keypoints"""
|
||||
self.processor.set_cpu_image(image, processed_image_cpu)
|
||||
keypoint_sets, scores = self.processor.keypoint_sets(fields)
|
||||
|
||||
if keypoint_sets.size > 0:
|
||||
self.keypoints_whole.append(np.around((keypoint_sets / self.scale_np), 1)
|
||||
.reshape(keypoint_sets.shape[0], -1).tolist())
|
||||
|
||||
pifpaf_out = [
|
||||
{'keypoints': np.around(kps / self.scale_np, 1).reshape(-1).tolist(),
|
||||
'bbox': [np.min(kps[:, 0]) / self.scale_np[0, 0], np.min(kps[:, 1]) / self.scale_np[0, 0],
|
||||
np.max(kps[:, 0]) / self.scale_np[0, 0], np.max(kps[:, 1]) / self.scale_np[0, 0]]}
|
||||
for kps in keypoint_sets
|
||||
]
|
||||
return keypoint_sets, scores, pifpaf_out
|
||||
@ -82,7 +82,7 @@ def factory_for_gt(im_size, name=None, path_gt=None, verbose=True):
|
||||
dic_gt = None
|
||||
x_factor = im_size[0] / 1600
|
||||
y_factor = im_size[1] / 900
|
||||
pixel_factor = (x_factor + y_factor) / 2 # 1.7 for MOT
|
||||
pixel_factor = (x_factor + y_factor) / 1.75 # 1.75 for MOT
|
||||
# pixel_factor = 1
|
||||
if im_size[0] / im_size[1] > 2.5:
|
||||
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
|
||||
@ -274,7 +274,6 @@ def extract_outputs(outputs, tasks=()):
|
||||
|
||||
if outputs.shape[1] == 10:
|
||||
dic_out['aux'] = torch.sigmoid(dic_out['aux'])
|
||||
|
||||
return dic_out
|
||||
|
||||
|
||||
|
||||
@ -2,144 +2,171 @@
|
||||
# pylint: disable=too-many-statements, too-many-branches, undefined-loop-variable
|
||||
|
||||
import os
|
||||
import glob
|
||||
import json
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
import PIL
|
||||
import openpifpaf
|
||||
import openpifpaf.datasets as datasets
|
||||
from openpifpaf.predict import processor_factory, preprocess_factory
|
||||
from openpifpaf import decoder, network, visualizer, show
|
||||
|
||||
from .visuals.printer import Printer
|
||||
from .visuals.pifpaf_show import KeypointPainter, image_canvas
|
||||
from .network import PifPaf, ImageList, Loco
|
||||
from .network import Loco
|
||||
from .network.process import factory_for_gt, preprocess_pifpaf
|
||||
from .activity import show_social
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def factory_from_args(args):
|
||||
|
||||
# Data
|
||||
if args.glob:
|
||||
args.images += glob.glob(args.glob)
|
||||
if not args.images:
|
||||
raise Exception("no image files given")
|
||||
|
||||
# Model
|
||||
if not args.checkpoint:
|
||||
args.checkpoint = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl' # Default model
|
||||
|
||||
# Devices
|
||||
args.device = torch.device('cpu')
|
||||
args.disable_cuda = False
|
||||
args.pin_memory = False
|
||||
if torch.cuda.is_available():
|
||||
args.device = torch.device('cuda')
|
||||
args.pin_memory = True
|
||||
args.loader_workers = 8
|
||||
|
||||
# Add visualization defaults
|
||||
args.figure_width = 10
|
||||
args.dpi_factor = 1.0
|
||||
|
||||
if args.net == 'monstereo':
|
||||
args.batch_size = 2
|
||||
else:
|
||||
args.batch_size = 1
|
||||
|
||||
# Make default pifpaf argument
|
||||
args.force_complete_pose = True
|
||||
print("Force complete pose is active")
|
||||
|
||||
# Configure
|
||||
decoder.configure(args)
|
||||
network.configure(args)
|
||||
show.configure(args)
|
||||
visualizer.configure(args)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def predict(args):
|
||||
|
||||
cnt = 0
|
||||
args = factory_from_args(args)
|
||||
|
||||
# Load Models
|
||||
pifpaf = PifPaf(args)
|
||||
assert args.mode in ('mono', 'stereo', 'pifpaf')
|
||||
assert args.net in ('monoloco_pp', 'monstereo', 'pifpaf')
|
||||
|
||||
if 'mono' in args.mode:
|
||||
monoloco = Loco(model=args.model, net='monoloco_pp',
|
||||
device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||
|
||||
if 'stereo' in args.mode:
|
||||
monstereo = Loco(model=args.model, net='monstereo',
|
||||
device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||
if args.net in ('monoloco_pp', 'monstereo'):
|
||||
net = Loco(model=args.model, net=args.net, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||
|
||||
# data
|
||||
data = ImageList(args.images, scale=args.scale)
|
||||
if args.mode == 'stereo':
|
||||
processor, model = processor_factory(args)
|
||||
preprocess = preprocess_factory(args)
|
||||
|
||||
# data
|
||||
data = datasets.ImageList(args.images, preprocess=preprocess)
|
||||
if args.net == 'monstereo':
|
||||
assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting"
|
||||
bs = 2
|
||||
else:
|
||||
bs = 1
|
||||
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
data, batch_size=bs, shuffle=False,
|
||||
pin_memory=args.pin_memory, num_workers=args.loader_workers)
|
||||
data, batch_size=args.batch_size, shuffle=False,
|
||||
pin_memory=False, collate_fn=datasets.collate_images_anns_meta)
|
||||
|
||||
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
|
||||
images = image_tensors.permute(0, 2, 3, 1)
|
||||
# visualizers
|
||||
annotation_painter = openpifpaf.show.AnnotationPainter()
|
||||
|
||||
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
|
||||
fields_batch = pifpaf.fields(processed_images)
|
||||
for batch_i, (image_tensors_batch, _, meta_batch) in enumerate(data_loader):
|
||||
pred_batch = processor.batch(model, image_tensors_batch, device=args.device)
|
||||
|
||||
# unbatch stereo pair
|
||||
for ii, (image_path, image, processed_image_cpu, fields) in enumerate(zip(
|
||||
image_paths, images, processed_images_cpu, fields_batch)):
|
||||
# unbatch (only for MonStereo)
|
||||
for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
|
||||
LOG.info('batch %d: %s', batch_i, meta['file_name'])
|
||||
pred = preprocess.annotations_inverse(pred, meta)
|
||||
|
||||
if args.output_directory is None:
|
||||
splits = os.path.split(image_paths[0])
|
||||
splits = os.path.split(meta['file_name'])
|
||||
output_path = os.path.join(splits[0], 'out_' + splits[1])
|
||||
else:
|
||||
file_name = os.path.basename(image_paths[0])
|
||||
file_name = os.path.basename(meta['file_name'])
|
||||
output_path = os.path.join(args.output_directory, 'out_' + file_name)
|
||||
print('image', idx, image_path, output_path)
|
||||
keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
|
||||
print('image', batch_i, meta['file_name'], output_path)
|
||||
pifpaf_out = [ann.json_data() for ann in pred]
|
||||
|
||||
if ii == 0:
|
||||
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
|
||||
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor and monoloco original image
|
||||
if idx == 0:
|
||||
pifpaf_outputs = pred # to only print left image for stereo
|
||||
pifpaf_outs = {'left': pifpaf_out}
|
||||
image_path_l = image_path
|
||||
with open(meta_batch[0]['file_name'], 'rb') as f:
|
||||
cpu_image = PIL.Image.open(f).convert('RGB')
|
||||
else:
|
||||
pifpaf_outs['right'] = pifpaf_out
|
||||
|
||||
if args.mode in ('stereo', 'mono'):
|
||||
# Extract calibration matrix and ground truth file if present
|
||||
with open(image_path_l, 'rb') as f:
|
||||
pil_image = Image.open(f).convert('RGB')
|
||||
images_outputs.append(pil_image)
|
||||
# 3D Predictions
|
||||
if args.net in ('monoloco_pp', 'monstereo'):
|
||||
|
||||
im_name = os.path.basename(image_path_l)
|
||||
im_size = (float(image.size()[1] / args.scale), float(image.size()[0] / args.scale)) # Original
|
||||
im_name = os.path.basename(meta['file_name'])
|
||||
im_size = (cpu_image.size[0], cpu_image.size[1]) # Original
|
||||
kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
|
||||
|
||||
# Preprocess pifpaf outputs and run monoloco
|
||||
boxes, keypoints = preprocess_pifpaf(pifpaf_outs['left'], im_size, enlarge_boxes=False)
|
||||
|
||||
if args.mode == 'mono':
|
||||
if args.net == 'monoloco_pp':
|
||||
print("Prediction with MonoLoco++")
|
||||
dic_out = monoloco.forward(keypoints, kk)
|
||||
dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt)
|
||||
dic_out = net.forward(keypoints, kk)
|
||||
dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=not args.social_distance)
|
||||
|
||||
if args.social_distance:
|
||||
show_social(args, cpu_image, output_path, pifpaf_out, dic_out)
|
||||
|
||||
else:
|
||||
print("Prediction with MonStereo")
|
||||
boxes_r, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size)
|
||||
dic_out = monstereo.forward(keypoints, kk, keypoints_r=keypoints_r)
|
||||
dic_out = monstereo.post_process(dic_out, boxes, keypoints, kk, dic_gt)
|
||||
dic_out = net.forward(keypoints, kk, keypoints_r=keypoints_r)
|
||||
dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt)
|
||||
|
||||
else:
|
||||
dic_out = defaultdict(list)
|
||||
kk = None
|
||||
|
||||
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
|
||||
if not args.social_distance:
|
||||
factory_outputs(args, annotation_painter, cpu_image, output_path, pifpaf_outputs,
|
||||
dic_out=dic_out, kk=kk)
|
||||
print('Image {}\n'.format(cnt) + '-' * 120)
|
||||
cnt += 1
|
||||
|
||||
|
||||
def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None):
|
||||
def factory_outputs(args, annotation_painter, cpu_image, output_path, pred, dic_out=None, kk=None):
|
||||
"""Output json files or images according to the choice"""
|
||||
|
||||
# Save json file
|
||||
if args.mode == 'pifpaf':
|
||||
keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:]
|
||||
if args.net == 'pifpaf':
|
||||
with openpifpaf.show.image_canvas(cpu_image, output_path) as ax:
|
||||
annotation_painter.annotations(ax, pred)
|
||||
|
||||
# Visualizer
|
||||
keypoint_painter = KeypointPainter(show_box=False)
|
||||
skeleton_painter = KeypointPainter(show_box=False, color_connections=True, markersize=1, linewidth=4)
|
||||
|
||||
if 'json' in args.output_types and keypoint_sets.size > 0:
|
||||
with open(output_path + '.pifpaf.json', 'w') as f:
|
||||
json.dump(pifpaf_out, f)
|
||||
|
||||
if 'keypoints' in args.output_types:
|
||||
with image_canvas(images_outputs[0],
|
||||
output_path + '.keypoints.png',
|
||||
show=args.show,
|
||||
fig_width=args.figure_width,
|
||||
dpi_factor=args.dpi_factor) as ax:
|
||||
keypoint_painter.keypoints(ax, keypoint_sets)
|
||||
|
||||
if 'skeleton' in args.output_types:
|
||||
with image_canvas(images_outputs[0],
|
||||
output_path + '.skeleton.png',
|
||||
show=args.show,
|
||||
fig_width=args.figure_width,
|
||||
dpi_factor=args.dpi_factor) as ax:
|
||||
skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
|
||||
|
||||
else:
|
||||
if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])):
|
||||
print(output_path)
|
||||
if dic_out['boxes']: # Only print in case of detections
|
||||
printer = Printer(images_outputs[1], output_path, kk, args)
|
||||
figures, axes = printer.factory_axes()
|
||||
printer.draw(figures, axes, dic_out, images_outputs[1])
|
||||
printer = Printer(cpu_image, output_path, kk, args)
|
||||
figures, axes = printer.factory_axes(dic_out)
|
||||
printer.draw(figures, axes, cpu_image)
|
||||
|
||||
if 'json' in args.output_types:
|
||||
with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
|
||||
|
||||
@ -24,10 +24,9 @@ from .transforms import flip_inputs, flip_labels, height_augmentation
|
||||
class PreprocessKitti:
|
||||
"""Prepare arrays with same format as nuScenes preprocessing but using ground truth txt files"""
|
||||
|
||||
# AV_W = 0.68
|
||||
# AV_L = 0.75
|
||||
# AV_H = 1.72
|
||||
# WLH_STD = 0.1
|
||||
dir_gt = os.path.join('data', 'kitti', 'gt')
|
||||
dir_images = '/data/lorenzo-data/kitti/original_images/training/image_2'
|
||||
dir_byc_l = '/data/lorenzo-data/kitti/object_detection/left'
|
||||
|
||||
# SOCIAL DISTANCING PARAMETERS
|
||||
THRESHOLD_DIST = 2 # Threshold to check distance of people
|
||||
@ -51,9 +50,6 @@ class PreprocessKitti:
|
||||
self.dir_ann = dir_ann
|
||||
self.iou_min = iou_min
|
||||
self.monocular = monocular
|
||||
self.dir_gt = os.path.join('data', 'kitti', 'gt')
|
||||
self.dir_images = '/data/lorenzo-data/kitti/original_images/training/image_2'
|
||||
self.dir_byc_l = '/data/lorenzo-data/kitti/object_detection/left'
|
||||
self.names_gt = tuple(os.listdir(self.dir_gt))
|
||||
self.dir_kk = os.path.join('data', 'kitti', 'calib')
|
||||
self.list_gt = glob.glob(self.dir_gt + '/*.txt')
|
||||
@ -97,7 +93,9 @@ class PreprocessKitti:
|
||||
category = 'pedestrian'
|
||||
|
||||
# Extract ground truth
|
||||
boxes_gt, ys, _, _ = parse_ground_truth(path_gt, category=category, spherical=True)
|
||||
boxes_gt, ys, _, _ = parse_ground_truth(path_gt, # pylint: disable=unbalanced-tuple-unpacking
|
||||
category=category,
|
||||
spherical=True)
|
||||
cnt_gt[phase] += len(boxes_gt)
|
||||
cnt_files += 1
|
||||
cnt_files_ped += min(len(boxes_gt), 1) # if no boxes 0 else 1
|
||||
@ -170,7 +168,7 @@ class PreprocessKitti:
|
||||
self.dic_jo[phase]['X'].append(inp)
|
||||
self.dic_jo[phase]['Y'].append(lab)
|
||||
self.dic_jo[phase]['names'].append(name) # One image name for each annotation
|
||||
append_cluster(self.dic_jo, phase, inp, lab, keypoint)
|
||||
append_cluster(self.dic_jo, phase, inp, lab, keypoint.tolist())
|
||||
cnt_mono[phase] += 1
|
||||
cnt_tot += 1
|
||||
|
||||
|
||||
@ -87,7 +87,7 @@ class PreprocessNuscenes:
|
||||
while not current_token == "":
|
||||
sample_dic = self.nusc.get('sample', current_token)
|
||||
cnt_samples += 1
|
||||
|
||||
# if (cnt_samples % 4 == 0) and (cnt_ann < 3000):
|
||||
# Extract all the sample_data tokens for each sample
|
||||
for cam in self.CAMERAS:
|
||||
sd_token = sample_dic['data'][cam]
|
||||
@ -105,7 +105,7 @@ class PreprocessNuscenes:
|
||||
self.dic_names[basename + '.jpg']['K'] = copy.deepcopy(kk)
|
||||
|
||||
# Run IoU with pifpaf detections and save
|
||||
path_pif = os.path.join(self.dir_ann, name + '.pifpaf.json')
|
||||
path_pif = os.path.join(self.dir_ann, name + '.predictions.json')
|
||||
exists = os.path.isfile(path_pif)
|
||||
|
||||
if exists:
|
||||
@ -114,7 +114,6 @@ class PreprocessNuscenes:
|
||||
boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900))
|
||||
else:
|
||||
continue
|
||||
|
||||
if keypoints:
|
||||
matches = get_iou_matches(boxes, boxes_gt, self.iou_min)
|
||||
for (idx, idx_gt) in matches:
|
||||
@ -130,7 +129,6 @@ class PreprocessNuscenes:
|
||||
append_cluster(self.dic_jo, phase, inp, lab, keypoint)
|
||||
cnt_ann += 1
|
||||
sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t')
|
||||
|
||||
current_token = sample_dic['next']
|
||||
|
||||
with open(os.path.join(self.path_joints), 'w') as f:
|
||||
@ -139,7 +137,7 @@ class PreprocessNuscenes:
|
||||
json.dump(self.dic_names, f)
|
||||
end = time.time()
|
||||
|
||||
extract_box_average(self.dic_jo['train']['boxes_3d'])
|
||||
# extract_box_average(self.dic_jo['train']['boxes_3d'])
|
||||
print("\nSaved {} annotations for {} samples in {} scenes. Total time: {:.1f} minutes"
|
||||
.format(cnt_ann, cnt_samples, cnt_scenes, (end-start)/60))
|
||||
print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints))
|
||||
|
||||
@ -2,8 +2,7 @@
|
||||
|
||||
import argparse
|
||||
|
||||
from openpifpaf.network import nets
|
||||
from openpifpaf import decoder
|
||||
from openpifpaf import decoder, network, visualizer, show
|
||||
|
||||
|
||||
def cli():
|
||||
@ -37,15 +36,18 @@ def cli():
|
||||
help='what to output: json keypoints skeleton for Pifpaf'
|
||||
'json bird front or multi for MonStereo')
|
||||
predict_parser.add_argument('--no_save', help='to show images', action='store_true')
|
||||
predict_parser.add_argument('--show', help='to show images', action='store_true')
|
||||
predict_parser.add_argument('--dpi', help='image resolution', type=int, default=100)
|
||||
predict_parser.add_argument('--dpi', help='image resolution', type=int, default=150)
|
||||
predict_parser.add_argument('--long-edge', default=None, type=int,
|
||||
help='rescale the long side of the image (aspect ratio maintained)')
|
||||
|
||||
# Pifpaf
|
||||
nets.cli(predict_parser)
|
||||
decoder.cli(predict_parser, force_complete_pose=True, instance_threshold=0.15)
|
||||
predict_parser.add_argument('--scale', default=1.0, type=float, help='change the scale of the image to preprocess')
|
||||
# Pifpaf parsers
|
||||
decoder.cli(predict_parser)
|
||||
network.cli(predict_parser)
|
||||
show.cli(predict_parser)
|
||||
visualizer.cli(predict_parser)
|
||||
|
||||
# Monoloco
|
||||
predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo')
|
||||
predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True)
|
||||
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512)
|
||||
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
|
||||
@ -57,18 +59,15 @@ def cli():
|
||||
predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
|
||||
|
||||
# Social distancing and social interactions
|
||||
predict_parser.add_argument('--social', help='social', action='store_true')
|
||||
predict_parser.add_argument('--activity', help='activity', action='store_true')
|
||||
predict_parser.add_argument('--json_dir', help='for social')
|
||||
predict_parser.add_argument('--social_distance', help='social', action='store_true')
|
||||
predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
|
||||
predict_parser.add_argument('--threshold_dist', type=float, help='min distance of people', default=2)
|
||||
predict_parser.add_argument('--margin', type=float, help='conservative for noise in orientation', default=1.5)
|
||||
predict_parser.add_argument('--radii', type=tuple, help='o-space radii', default=(0.25, 1, 2))
|
||||
predict_parser.add_argument('--threshold_dist', type=float, help='min distance of people', default=2.5)
|
||||
predict_parser.add_argument('--radii', type=tuple, help='o-space radii', default=(0.3, 0.5, 1))
|
||||
|
||||
# Training
|
||||
training_parser.add_argument('--joints', help='Json file with input joints',
|
||||
default='data/arrays/joints-nuscenes_teaser-190513-1846.json')
|
||||
training_parser.add_argument('--save', help='whether to not save model and log file', action='store_true')
|
||||
training_parser.add_argument('--no_save', help='to not save model and log file', action='store_true')
|
||||
training_parser.add_argument('-e', '--epochs', type=int, help='number of epochs to train for', default=500)
|
||||
training_parser.add_argument('--bs', type=int, default=512, help='input batch size')
|
||||
training_parser.add_argument('--monocular', help='whether to train monoloco', action='store_true')
|
||||
@ -81,7 +80,9 @@ def cli():
|
||||
training_parser.add_argument('--hyp', help='run hyperparameters tuning', action='store_true')
|
||||
training_parser.add_argument('--multiplier', type=int, help='Size of the grid of hyp search', default=1)
|
||||
training_parser.add_argument('--r_seed', type=int, help='specify the seed for training and hyp tuning', default=1)
|
||||
training_parser.add_argument('--activity', help='new', action='store_true')
|
||||
training_parser.add_argument('--print_loss', help='print training and validation losses', action='store_true')
|
||||
training_parser.add_argument('--auto_tune_mtl', help='whether to use uncertainty to autotune losses',
|
||||
action='store_true')
|
||||
|
||||
# Evaluation
|
||||
eval_parser.add_argument('--dataset', help='datasets to evaluate, kitti or nuscenes', default='kitti')
|
||||
@ -102,6 +103,9 @@ def cli():
|
||||
eval_parser.add_argument('--variance', help='evaluate keypoints variance', action='store_true')
|
||||
eval_parser.add_argument('--activity', help='evaluate activities', action='store_true')
|
||||
eval_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo')
|
||||
eval_parser.add_argument('--baselines', help='whether to evaluate stereo baselines', action='store_true')
|
||||
eval_parser.add_argument('--generate_official', help='whether to add empty txt files for official evaluation',
|
||||
action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
@ -110,10 +114,7 @@ def cli():
|
||||
def main():
|
||||
args = cli()
|
||||
if args.command == 'predict':
|
||||
if args.activity:
|
||||
from .activity import predict
|
||||
else:
|
||||
from .predict import predict
|
||||
from .predict import predict
|
||||
predict(args)
|
||||
|
||||
elif args.command == 'prep':
|
||||
@ -135,14 +136,11 @@ def main():
|
||||
hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs,
|
||||
monocular=args.monocular, dropout=args.dropout,
|
||||
multiplier=args.multiplier, r_seed=args.r_seed)
|
||||
hyp_tuning.train()
|
||||
hyp_tuning.train(args)
|
||||
else:
|
||||
|
||||
from .train import Trainer
|
||||
training = Trainer(joints=args.joints, epochs=args.epochs, bs=args.bs,
|
||||
monocular=args.monocular, dropout=args.dropout, lr=args.lr, sched_step=args.sched_step,
|
||||
n_stage=args.n_stage, sched_gamma=args.sched_gamma, hidden_size=args.hidden_size,
|
||||
r_seed=args.r_seed, save=args.save)
|
||||
training = Trainer(args)
|
||||
|
||||
_ = training.train()
|
||||
_ = training.evaluate()
|
||||
@ -169,19 +167,18 @@ def main():
|
||||
else:
|
||||
if args.generate:
|
||||
from .eval.generate_kitti import GenerateKitti
|
||||
kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout,
|
||||
hidden_size=args.hidden_size)
|
||||
kitti_txt = GenerateKitti(args)
|
||||
kitti_txt.run()
|
||||
|
||||
if args.dataset == 'kitti':
|
||||
from .eval import EvalKitti
|
||||
kitti_eval = EvalKitti(verbose=args.verbose)
|
||||
kitti_eval = EvalKitti(args)
|
||||
kitti_eval.run()
|
||||
kitti_eval.printer(show=args.show, save=args.save)
|
||||
kitti_eval.printer()
|
||||
|
||||
elif 'nuscenes' in args.dataset:
|
||||
from .train import Trainer
|
||||
training = Trainer(joints=args.joints, hidden_size=args.hidden_size)
|
||||
training = Trainer(args)
|
||||
_ = training.evaluate(load=True, model=args.model, debug=False)
|
||||
|
||||
else:
|
||||
|
||||
@ -61,7 +61,7 @@ class HypTuning:
|
||||
# plt.hist(self.lr_list, bins=50)
|
||||
# plt.show()
|
||||
|
||||
def train(self):
|
||||
def train(self, args):
|
||||
"""Train multiple times using log-space random search"""
|
||||
|
||||
best_acc_val = 20
|
||||
@ -76,10 +76,7 @@ class HypTuning:
|
||||
hidden_size = self.hidden_list[idx]
|
||||
n_stage = self.n_stage_list[idx]
|
||||
|
||||
training = Trainer(joints=self.joints, epochs=self.num_epochs,
|
||||
bs=bs, monocular=self.monocular, dropout=self.dropout, lr=lr, sched_step=sched_step,
|
||||
sched_gamma=sched_gamma, hidden_size=hidden_size, n_stage=n_stage,
|
||||
save=False, print_loss=False, r_seed=self.r_seed)
|
||||
training = Trainer(args)
|
||||
|
||||
best_epoch = training.train()
|
||||
dic_err, model = training.evaluate()
|
||||
|
||||
@ -27,7 +27,7 @@ class AutoTuneMultiTaskLoss(torch.nn.Module):
|
||||
loss_values = [lam * l(o, g) / (2.0 * (log_sigma.exp() ** 2))
|
||||
for lam, log_sigma, l, o, g in zip(self.lambdas, self.log_sigmas, self.losses, out, gt_out)]
|
||||
|
||||
auto_reg = [log_sigma for log_sigma in self.log_sigmas]
|
||||
auto_reg = [log_sigma for log_sigma in self.log_sigmas] # pylint: disable=unnecessary-comprehension
|
||||
|
||||
loss = sum(loss_values) + sum(auto_reg)
|
||||
if phase == 'val':
|
||||
@ -70,7 +70,7 @@ class MultiTaskLoss(torch.nn.Module):
|
||||
class CompositeLoss(torch.nn.Module):
|
||||
|
||||
def __init__(self, tasks):
|
||||
super(CompositeLoss, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
self.tasks = tasks
|
||||
self.multi_loss_tr = {task: (LaplacianLoss() if task == 'd'
|
||||
@ -98,7 +98,7 @@ class CompositeLoss(torch.nn.Module):
|
||||
class LaplacianLoss(torch.nn.Module):
|
||||
"""1D Gaussian with std depending on the absolute distance"""
|
||||
def __init__(self, size_average=True, reduce=True, evaluate=False):
|
||||
super(LaplacianLoss, self).__init__()
|
||||
super().__init__()
|
||||
self.size_average = size_average
|
||||
self.reduce = reduce
|
||||
self.evaluate = evaluate
|
||||
@ -140,7 +140,7 @@ class GaussianLoss(torch.nn.Module):
|
||||
"""1D Gaussian with std depending on the absolute distance
|
||||
"""
|
||||
def __init__(self, device, size_average=True, reduce=True, evaluate=False):
|
||||
super(GaussianLoss, self).__init__()
|
||||
super().__init__()
|
||||
self.size_average = size_average
|
||||
self.reduce = reduce
|
||||
self.evaluate = evaluate
|
||||
|
||||
@ -34,10 +34,9 @@ class Trainer:
|
||||
tasks = ('d', 'x', 'y', 'h', 'w', 'l', 'ori', 'aux')
|
||||
val_task = 'd'
|
||||
lambdas = (1, 1, 1, 1, 1, 1, 1, 1)
|
||||
clusters = ['10', '20', '30', '40']
|
||||
|
||||
def __init__(self, joints, epochs=100, bs=256, dropout=0.2, lr=0.002,
|
||||
sched_step=20, sched_gamma=1, hidden_size=256, n_stage=3, r_seed=1, n_samples=100,
|
||||
monocular=False, save=False, print_loss=True):
|
||||
def __init__(self, args):
|
||||
"""
|
||||
Initialize directories, load the data and parameters for the training
|
||||
"""
|
||||
@ -49,31 +48,29 @@ class Trainer:
|
||||
dir_logs = os.path.join('data', 'logs')
|
||||
if not os.path.exists(dir_logs):
|
||||
warnings.warn("Warning: default logs directory not found")
|
||||
assert os.path.exists(joints), "Input file not found"
|
||||
assert os.path.exists(args.joints), "Input file not found"
|
||||
|
||||
self.joints = joints
|
||||
self.num_epochs = epochs
|
||||
self.save = save
|
||||
self.print_loss = print_loss
|
||||
self.monocular = monocular
|
||||
self.lr = lr
|
||||
self.sched_step = sched_step
|
||||
self.sched_gamma = sched_gamma
|
||||
self.clusters = ['10', '20', '30', '50', '>50']
|
||||
self.hidden_size = hidden_size
|
||||
self.n_stage = n_stage
|
||||
self.joints = args.joints
|
||||
self.num_epochs = args.epochs
|
||||
self.no_save = args.no_save
|
||||
self.print_loss = args.print_loss
|
||||
self.monocular = args.monocular
|
||||
self.lr = args.lr
|
||||
self.sched_step = args.sched_step
|
||||
self.sched_gamma = args.sched_gamma
|
||||
self.hidden_size = args.hidden_size
|
||||
self.n_stage = args.n_stage
|
||||
self.dir_out = dir_out
|
||||
self.n_samples = n_samples
|
||||
self.r_seed = r_seed
|
||||
self.auto_tune_mtl = False
|
||||
self.r_seed = args.r_seed
|
||||
self.auto_tune_mtl = args.auto_tune_mtl
|
||||
|
||||
# Select the device
|
||||
use_cuda = torch.cuda.is_available()
|
||||
self.device = torch.device("cuda" if use_cuda else "cpu")
|
||||
print('Device: ', self.device)
|
||||
torch.manual_seed(r_seed)
|
||||
torch.manual_seed(self.r_seed)
|
||||
if use_cuda:
|
||||
torch.cuda.manual_seed(r_seed)
|
||||
torch.cuda.manual_seed(self.r_seed)
|
||||
|
||||
# Remove auxiliary task if monocular
|
||||
if self.monocular and self.tasks[-1] == 'aux':
|
||||
@ -95,25 +92,28 @@ class Trainer:
|
||||
input_size = 34
|
||||
output_size = 9
|
||||
|
||||
name = 'monoloco_pp' if self.monocular else 'monstereo'
|
||||
now = datetime.datetime.now()
|
||||
now_time = now.strftime("%Y%m%d-%H%M")[2:]
|
||||
name_out = 'monstereo-' + now_time
|
||||
if self.save:
|
||||
name_out = name + '-' + now_time
|
||||
if not self.no_save:
|
||||
self.path_model = os.path.join(dir_out, name_out + '.pkl')
|
||||
self.logger = set_logger(os.path.join(dir_logs, name_out))
|
||||
self.logger.info("Training arguments: \nepochs: {} \nbatch_size: {} \ndropout: {}"
|
||||
"\nmonocular: {} \nlearning rate: {} \nscheduler step: {} \nscheduler gamma: {} "
|
||||
"\ninput_size: {} \noutput_size: {}\nhidden_size: {} \nn_stages: {} "
|
||||
"\nr_seed: {} \nlambdas: {} \ninput_file: {}"
|
||||
.format(epochs, bs, dropout, self.monocular, lr, sched_step, sched_gamma, input_size,
|
||||
output_size, hidden_size, n_stage, r_seed, self.lambdas, self.joints))
|
||||
.format(args.epochs, args.bs, args.dropout, self.monocular,
|
||||
args.lr, args.sched_step, args.sched_gamma, input_size,
|
||||
output_size, args.hidden_size, args.n_stage, args.r_seed,
|
||||
self.lambdas, self.joints))
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Dataloader
|
||||
self.dataloaders = {phase: DataLoader(KeypointsDataset(self.joints, phase=phase),
|
||||
batch_size=bs, shuffle=True) for phase in ['train', 'val']}
|
||||
batch_size=args.bs, shuffle=True) for phase in ['train', 'val']}
|
||||
|
||||
self.dataset_sizes = {phase: len(KeypointsDataset(self.joints, phase=phase))
|
||||
for phase in ['train', 'val']}
|
||||
@ -122,15 +122,16 @@ class Trainer:
|
||||
self.logger.info('Sizes of the dataset: {}'.format(self.dataset_sizes))
|
||||
print(">>> creating model")
|
||||
|
||||
self.model = MonStereoModel(input_size=input_size, output_size=output_size, linear_size=hidden_size,
|
||||
p_dropout=dropout, num_stage=self.n_stage, device=self.device)
|
||||
self.model = MonStereoModel(input_size=input_size, output_size=output_size, linear_size=args.hidden_size,
|
||||
p_dropout=args.dropout, num_stage=self.n_stage, device=self.device)
|
||||
self.model.to(self.device)
|
||||
print(">>> model params: {:.3f}M".format(sum(p.numel() for p in self.model.parameters()) / 1000000.0))
|
||||
print(">>> loss params: {}".format(sum(p.numel() for p in self.mt_loss.parameters())))
|
||||
|
||||
# Optimizer and scheduler
|
||||
all_params = chain(self.model.parameters(), self.mt_loss.parameters())
|
||||
self.optimizer = torch.optim.Adam(params=all_params, lr=lr)
|
||||
self.optimizer = torch.optim.Adam(params=all_params, lr=args.lr)
|
||||
self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min')
|
||||
self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=self.sched_step, gamma=self.sched_gamma)
|
||||
|
||||
def train(self):
|
||||
@ -155,11 +156,11 @@ class Trainer:
|
||||
labels = labels.to(self.device)
|
||||
with torch.set_grad_enabled(phase == 'train'):
|
||||
if phase == 'train':
|
||||
self.optimizer.zero_grad()
|
||||
outputs = self.model(inputs)
|
||||
loss, loss_values = self.mt_loss(outputs, labels, phase=phase)
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 2)
|
||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 3)
|
||||
self.optimizer.step()
|
||||
self.scheduler.step()
|
||||
|
||||
@ -242,7 +243,7 @@ class Trainer:
|
||||
self.cout_stats(dic_err['val'], size_eval, clst=clst)
|
||||
|
||||
# Save the model and the results
|
||||
if self.save and not load:
|
||||
if not (self.no_save or load):
|
||||
torch.save(self.model.state_dict(), self.path_model)
|
||||
print('-' * 120)
|
||||
self.logger.info("\nmodel saved: {} \n".format(self.path_model))
|
||||
@ -264,7 +265,6 @@ class Trainer:
|
||||
|
||||
# Distance
|
||||
errs = torch.abs(extract_outputs(outputs)['d'] - extract_labels(labels)['d'])
|
||||
|
||||
assert rel_frac > 0.99, "Variance of errors not supported with partial evaluation"
|
||||
|
||||
# Uncertainty
|
||||
|
||||
@ -57,7 +57,7 @@ def get_iou_matches(boxes, boxes_gt, iou_min=0.3):
|
||||
ious.append(iou)
|
||||
idx_gt_max = int(np.argmax(ious))
|
||||
if (ious[idx_gt_max] >= iou_min) and (idx_gt_max not in used):
|
||||
matches.append((idx, idx_gt_max))
|
||||
matches.append((int(idx), idx_gt_max))
|
||||
used.append(idx_gt_max)
|
||||
return matches
|
||||
|
||||
@ -93,6 +93,6 @@ def reorder_matches(matches, boxes, mode='left_rigth'):
|
||||
|
||||
# Order the boxes based on the left-right position in the image and
|
||||
ordered_boxes = np.argsort([box[0] for box in boxes]) # indices of boxes ordered from left to right
|
||||
matches_left = [idx for (idx, _) in matches]
|
||||
matches_left = [int(idx) for (idx, _) in matches]
|
||||
|
||||
return [matches[matches_left.index(idx_boxes)] for idx_boxes in ordered_boxes if idx_boxes in matches_left]
|
||||
|
||||
@ -199,11 +199,11 @@ def factory_file(path_calib, dir_ann, basename, mode='left'):
|
||||
|
||||
if mode == 'left':
|
||||
kk, tt = p_left[:]
|
||||
path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json')
|
||||
path_ann = os.path.join(dir_ann, basename + '.png.predictions.json')
|
||||
|
||||
else:
|
||||
kk, tt = p_right[:]
|
||||
path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json')
|
||||
path_ann = os.path.join(dir_ann + '_right', basename + '.png.predictions.json')
|
||||
|
||||
from ..utils import open_annotations
|
||||
annotations = open_annotations(path_ann)
|
||||
|
||||
@ -20,14 +20,14 @@ def append_cluster(dic_jo, phase, xx, ys, kps):
|
||||
dic_jo[phase]['clst']['30']['kps'].append(kps)
|
||||
dic_jo[phase]['clst']['30']['X'].append(xx)
|
||||
dic_jo[phase]['clst']['30']['Y'].append(ys)
|
||||
elif ys[3] < 50:
|
||||
dic_jo[phase]['clst']['50']['kps'].append(kps)
|
||||
dic_jo[phase]['clst']['50']['X'].append(xx)
|
||||
dic_jo[phase]['clst']['50']['Y'].append(ys)
|
||||
elif ys[3] <= 40:
|
||||
dic_jo[phase]['clst']['40']['kps'].append(kps)
|
||||
dic_jo[phase]['clst']['40']['X'].append(xx)
|
||||
dic_jo[phase]['clst']['40']['Y'].append(ys)
|
||||
else:
|
||||
dic_jo[phase]['clst']['>50']['kps'].append(kps)
|
||||
dic_jo[phase]['clst']['>50']['X'].append(xx)
|
||||
dic_jo[phase]['clst']['>50']['Y'].append(ys)
|
||||
dic_jo[phase]['clst']['>40']['kps'].append(kps)
|
||||
dic_jo[phase]['clst']['>40']['X'].append(xx)
|
||||
dic_jo[phase]['clst']['>40']['Y'].append(ys)
|
||||
|
||||
|
||||
def get_task_error(dd):
|
||||
@ -58,7 +58,7 @@ def make_new_directory(dir_out):
|
||||
if os.path.exists(dir_out):
|
||||
shutil.rmtree(dir_out)
|
||||
os.makedirs(dir_out)
|
||||
print("Created empty output directory for {} txt files".format(dir_out))
|
||||
print("Created empty output directory {} ".format(dir_out))
|
||||
|
||||
|
||||
def normalize_hwl(lab):
|
||||
|
||||
@ -17,21 +17,22 @@ DPI = 200
|
||||
GRID_WIDTH = 0.5
|
||||
|
||||
|
||||
def show_results(dic_stats, clusters, dir_out='data/figures', show=False, save=False, stereo=True):
|
||||
def show_results(dic_stats, clusters, net, dir_fig, show=False, save=False):
|
||||
"""
|
||||
Visualize error as function of the distance and compare it with target errors based on human height analyses
|
||||
"""
|
||||
|
||||
phase = 'test'
|
||||
x_min = 3
|
||||
x_max = 42
|
||||
# x_max = 42
|
||||
x_max = 31
|
||||
y_min = 0
|
||||
# y_max = 2.2
|
||||
y_max = 3.5 if stereo else 5.2
|
||||
y_max = 3.5 if net == 'monstereo' else 2.7
|
||||
xx = np.linspace(x_min, x_max, 100)
|
||||
excl_clusters = ['all', 'easy', 'moderate', 'hard']
|
||||
excl_clusters = ['all', 'easy', 'moderate', 'hard', '49']
|
||||
clusters = [clst for clst in clusters if clst not in excl_clusters]
|
||||
styles = printing_styles(stereo)
|
||||
styles = printing_styles(net)
|
||||
for idx_style, style in enumerate(styles.items()):
|
||||
plt.figure(idx_style, figsize=FIGSIZE)
|
||||
plt.grid(linewidth=GRID_WIDTH)
|
||||
@ -48,10 +49,10 @@ def show_results(dic_stats, clusters, dir_out='data/figures', show=False, save=F
|
||||
plt.plot(xxs, errs, marker=styles['mks'][idx], markersize=styles['mksizes'][idx],
|
||||
linewidth=styles['lws'][idx],
|
||||
label=styles['labels'][idx], linestyle=styles['lstyles'][idx], color=styles['colors'][idx])
|
||||
if method in ('monstereo', 'pseudo-lidar'):
|
||||
if method in ('monstereo', 'monoloco_pp', 'pseudo-lidar'):
|
||||
for i, x in enumerate(xxs):
|
||||
plt.text(x, errs[i], str(cnts[i]), fontsize=FONTSIZE)
|
||||
if not stereo:
|
||||
plt.text(x, errs[i] - 0.1, str(cnts[i]), fontsize=FONTSIZE)
|
||||
if net == 'monoloco_pp':
|
||||
plt.plot(xx, get_task_error(xx), '--', label="Task error", color='lightgreen', linewidth=2.5)
|
||||
# if stereo:
|
||||
# yy_stereo = get_pixel_error(xx)
|
||||
@ -62,61 +63,61 @@ def show_results(dic_stats, clusters, dir_out='data/figures', show=False, save=F
|
||||
plt.yticks(fontsize=FONTSIZE)
|
||||
if save:
|
||||
plt.tight_layout()
|
||||
mode = 'stereo' if stereo else 'mono'
|
||||
path_fig = os.path.join(dir_out, 'results_' + mode + '.png')
|
||||
path_fig = os.path.join(dir_fig, 'results_' + net + '.png')
|
||||
plt.savefig(path_fig, dpi=DPI)
|
||||
print("Figure of results " + mode + " saved in {}".format(path_fig))
|
||||
print("Figure of results " + net + " saved in {}".format(path_fig))
|
||||
if show:
|
||||
plt.show()
|
||||
plt.close('all')
|
||||
|
||||
|
||||
def show_spread(dic_stats, clusters, dir_out='data/figures', show=False, save=False):
|
||||
def show_spread(dic_stats, clusters, net, dir_fig, show=False, save=False):
|
||||
"""Predicted confidence intervals and task error as a function of ground-truth distance"""
|
||||
|
||||
assert net in ('monoloco_pp', 'monstereo'), "network not recognized"
|
||||
phase = 'test'
|
||||
excl_clusters = ['all', 'easy', 'moderate', 'hard']
|
||||
excl_clusters = ['all', 'easy', 'moderate', 'hard', '49']
|
||||
clusters = [clst for clst in clusters if clst not in excl_clusters]
|
||||
x_min = 3
|
||||
x_max = 42
|
||||
x_max = 31
|
||||
y_min = 0
|
||||
|
||||
for method in ('monoloco_pp', 'monstereo'):
|
||||
plt.figure(2, figsize=FIGSIZE)
|
||||
xxs = get_distances(clusters)
|
||||
bbs = np.array([dic_stats[phase][method][key]['std_ale'] for key in clusters[:-1]])
|
||||
if method == 'monoloco_pp':
|
||||
y_max = 5
|
||||
color = 'deepskyblue'
|
||||
epis = np.array([dic_stats[phase][method][key]['std_epi'] for key in clusters[:-1]])
|
||||
plt.plot(xxs, epis, marker='o', color='coral', label="Combined uncertainty (\u03C3)")
|
||||
else:
|
||||
y_max = 3.5
|
||||
color = 'b'
|
||||
plt.plot(xx, get_pixel_error(xx), linewidth=2.5, color='k', label='Pixel error')
|
||||
plt.plot(xxs, bbs, marker='s', color=color, label="Aleatoric uncertainty (b)", linewidth=4, markersize=8)
|
||||
xx = np.linspace(x_min, x_max, 100)
|
||||
plt.plot(xx, get_task_error(xx), '--', label="Task error (monocular bound)", color='lightgreen', linewidth=4)
|
||||
plt.figure(2, figsize=FIGSIZE)
|
||||
xxs = get_distances(clusters)
|
||||
bbs = np.array([dic_stats[phase][net][key]['std_ale'] for key in clusters[:-1]])
|
||||
xx = np.linspace(x_min, x_max, 100)
|
||||
if net == 'monoloco_pp':
|
||||
y_max = 2.7
|
||||
color = 'deepskyblue'
|
||||
epis = np.array([dic_stats[phase][net][key]['std_epi'] for key in clusters[:-1]])
|
||||
plt.plot(xxs, epis, marker='o', color='coral', linewidth=4, markersize=8, label="Combined uncertainty (\u03C3)")
|
||||
else:
|
||||
y_max = 3.5
|
||||
color = 'b'
|
||||
plt.plot(xx, get_pixel_error(xx), linewidth=2.5, color='k', label='Pixel error')
|
||||
plt.plot(xxs, bbs, marker='s', color=color, label="Aleatoric uncertainty (b)", linewidth=4, markersize=8)
|
||||
plt.plot(xx, get_task_error(xx), '--', label="Task error (monocular bound)", color='lightgreen', linewidth=4)
|
||||
|
||||
plt.xlabel("Ground-truth distance [m]", fontsize=FONTSIZE)
|
||||
plt.ylabel("Uncertainty [m]", fontsize=FONTSIZE)
|
||||
plt.xlim(x_min, x_max)
|
||||
plt.ylim(y_min, y_max)
|
||||
plt.grid(linewidth=GRID_WIDTH)
|
||||
plt.legend(prop={'size': FONTSIZE})
|
||||
plt.xticks(fontsize=FONTSIZE)
|
||||
plt.yticks(fontsize=FONTSIZE)
|
||||
if save:
|
||||
plt.tight_layout()
|
||||
path_fig = os.path.join(dir_out, 'spread_' + method + '.png')
|
||||
plt.savefig(path_fig, dpi=DPI)
|
||||
print("Figure of confidence intervals saved in {}".format(path_fig))
|
||||
if show:
|
||||
plt.show()
|
||||
plt.close('all')
|
||||
plt.xlabel("Ground-truth distance [m]", fontsize=FONTSIZE)
|
||||
plt.ylabel("Uncertainty [m]", fontsize=FONTSIZE)
|
||||
plt.xlim(x_min, x_max)
|
||||
plt.ylim(y_min, y_max)
|
||||
plt.grid(linewidth=GRID_WIDTH)
|
||||
plt.legend(prop={'size': FONTSIZE})
|
||||
plt.xticks(fontsize=FONTSIZE)
|
||||
plt.yticks(fontsize=FONTSIZE)
|
||||
|
||||
if save:
|
||||
plt.tight_layout()
|
||||
path_fig = os.path.join(dir_fig, 'spread_' + net + '.png')
|
||||
plt.savefig(path_fig, dpi=DPI)
|
||||
print("Figure of confidence intervals saved in {}".format(path_fig))
|
||||
if show:
|
||||
plt.show()
|
||||
plt.close('all')
|
||||
|
||||
|
||||
def show_task_error(show, save, dir_out='data/figures'):
|
||||
def show_task_error(dir_fig, show, save):
|
||||
"""Task error figure"""
|
||||
plt.figure(3, figsize=FIGSIZE)
|
||||
xx = np.linspace(0.1, 50, 100)
|
||||
@ -147,7 +148,7 @@ def show_task_error(show, save, dir_out='data/figures'):
|
||||
plt.xticks(fontsize=FONTSIZE)
|
||||
plt.yticks(fontsize=FONTSIZE)
|
||||
if save:
|
||||
path_fig = os.path.join(dir_out, 'task_error.png')
|
||||
path_fig = os.path.join(dir_fig, 'task_error.png')
|
||||
plt.savefig(path_fig, dpi=DPI)
|
||||
print("Figure of task error saved in {}".format(path_fig))
|
||||
if show:
|
||||
@ -181,7 +182,7 @@ def show_method(save, dir_out='data/figures'):
|
||||
plt.close('all')
|
||||
|
||||
|
||||
def show_box_plot(dic_errors, clusters, dir_out='data/figures', show=False, save=False):
|
||||
def show_box_plot(dic_errors, clusters, dir_fig, show=False, save=False):
|
||||
import pandas as pd
|
||||
excl_clusters = ['all', 'easy', 'moderate', 'hard']
|
||||
clusters = [int(clst) for clst in clusters if clst not in excl_clusters]
|
||||
@ -205,7 +206,7 @@ def show_box_plot(dic_errors, clusters, dir_out='data/figures', show=False, save
|
||||
plt.ylim(y_min, y_max)
|
||||
|
||||
if save:
|
||||
path_fig = os.path.join(dir_out, 'box_plot_' + name + '.png')
|
||||
path_fig = os.path.join(dir_fig, 'box_plot_' + name + '.png')
|
||||
plt.tight_layout()
|
||||
plt.savefig(path_fig, dpi=DPI)
|
||||
print("Figure of box plot saved in {}".format(path_fig))
|
||||
@ -300,8 +301,8 @@ def get_percentile(dist_gmm):
|
||||
# mad_d = np.mean(np.abs(dist_d - mu_d))
|
||||
|
||||
|
||||
def printing_styles(stereo):
|
||||
if stereo:
|
||||
def printing_styles(net):
|
||||
if net == 'monstereo':
|
||||
style = {"labels": ['3DOP', 'PSF', 'MonoLoco', 'MonoPSR', 'Pseudo-Lidar', 'Our MonStereo'],
|
||||
"methods": ['3dop', 'psf', 'monoloco', 'monopsr', 'pseudo-lidar', 'monstereo'],
|
||||
"mks": ['s', 'p', 'o', 'v', '*', '^'],
|
||||
@ -309,11 +310,12 @@ def printing_styles(stereo):
|
||||
"colors": ['gold', 'skyblue', 'darkgreen', 'pink', 'darkorange', 'b'],
|
||||
"lstyles": ['solid', 'solid', 'dashed', 'dashed', 'solid', 'solid']}
|
||||
else:
|
||||
style = {"labels": ['Mono3D', 'Geometric Baseline', 'MonoPSR', '3DOP (stereo)', 'MonoLoco', 'Monoloco++'],
|
||||
"methods": ['m3d', 'geometric', 'monopsr', '3dop', 'monoloco', 'monoloco_pp'],
|
||||
style = {"labels": ['Geometric Baseline', 'MonoPSR', 'MonoDIS', '3DOP (stereo)',
|
||||
'MonoLoco', 'Monoloco++'],
|
||||
"methods": ['geometric', 'monopsr', 'monodis', '3dop', 'monoloco', 'monoloco_pp'],
|
||||
"mks": ['*', '^', 'p', '.', 's', 'o', 'o'],
|
||||
"mksizes": [6, 6, 6, 6, 6, 6], "lws": [1.5, 1.5, 1.5, 1.5, 1.5, 2.2],
|
||||
"colors": ['r', 'purple', 'olive', 'darkorange', 'b', 'darkblue'],
|
||||
"colors": ['purple', 'olive', 'r', 'darkorange', 'b', 'darkblue'],
|
||||
"lstyles": ['solid', 'solid', 'solid', 'dashdot', 'solid', 'solid', ]}
|
||||
|
||||
return style
|
||||
|
||||
@ -1,3 +1,6 @@
|
||||
|
||||
# File adapted from https://github.com/vita-epfl/openpifpaf
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
import numpy as np
|
||||
@ -39,21 +42,20 @@ def canvas(fig_file=None, show=True, **kwargs):
|
||||
@contextmanager
|
||||
def image_canvas(image, fig_file=None, show=True, dpi_factor=1.0, fig_width=10.0, **kwargs):
|
||||
if 'figsize' not in kwargs:
|
||||
kwargs['figsize'] = (fig_width, fig_width * image.shape[0] / image.shape[1])
|
||||
kwargs['figsize'] = (fig_width, fig_width * image.size[1] / image.size[0])
|
||||
|
||||
fig = plt.figure(**kwargs)
|
||||
ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
|
||||
ax.set_axis_off()
|
||||
ax.set_xlim(0, image.shape[1])
|
||||
ax.set_ylim(image.shape[0], 0)
|
||||
ax.set_xlim(0, image.size[0])
|
||||
ax.set_ylim(image.size[1], 0)
|
||||
fig.add_axes(ax)
|
||||
image_2 = ndimage.gaussian_filter(image, sigma=2.5)
|
||||
ax.imshow(image_2, alpha=0.4)
|
||||
|
||||
yield ax
|
||||
|
||||
if fig_file:
|
||||
fig.savefig(fig_file, dpi=image.shape[1] / kwargs['figsize'][0] * dpi_factor)
|
||||
fig.savefig(fig_file, dpi=image.size[0] / kwargs['figsize'][0] * dpi_factor)
|
||||
print('keypoints image saved')
|
||||
if show:
|
||||
plt.show()
|
||||
|
||||
@ -28,7 +28,7 @@ def image_attributes(dpi, output_types):
|
||||
fontsize_num=round(22 * c),
|
||||
fontsize_ax=round(16 * c),
|
||||
linewidth=round(8 * c),
|
||||
markersize=round(16 * c),
|
||||
markersize=round(13 * c),
|
||||
y_box_margin=round(24 * math.sqrt(c)),
|
||||
stereo=dict(color='deepskyblue',
|
||||
numcolor='darkorange',
|
||||
@ -58,7 +58,7 @@ class Printer:
|
||||
self.output_path = output_path
|
||||
self.kk = kk
|
||||
self.output_types = args.output_types
|
||||
self.z_max = args.z_max # To include ellipses in the image
|
||||
self.z_max = args.z_max # set max distance to show instances
|
||||
self.show_all = args.show_all
|
||||
self.show = args.show_all
|
||||
self.save = not args.no_save
|
||||
@ -74,26 +74,41 @@ class Printer:
|
||||
self.xx_gt = [xx[0] for xx in dic_ann['xyz_real']]
|
||||
self.xx_pred = [xx[0] for xx in dic_ann['xyz_pred']]
|
||||
|
||||
# Set maximum distance
|
||||
self.dd_pred = dic_ann['dds_pred']
|
||||
self.dd_real = dic_ann['dds_real']
|
||||
self.z_max = int(min(self.z_max, 4 + max(max(self.dd_pred), max(self.dd_real, default=0))))
|
||||
|
||||
# Do not print instances outside z_max
|
||||
self.zz_gt = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0
|
||||
for idx, xx in enumerate(dic_ann['xyz_real'])]
|
||||
self.zz_pred = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0
|
||||
for idx, xx in enumerate(dic_ann['xyz_pred'])]
|
||||
self.dd_pred = dic_ann['dds_pred']
|
||||
self.dd_real = dic_ann['dds_real']
|
||||
|
||||
self.uv_heads = dic_ann['uv_heads']
|
||||
self.uv_shoulders = dic_ann['uv_shoulders']
|
||||
self.boxes = dic_ann['boxes']
|
||||
self.boxes_gt = dic_ann['boxes_gt']
|
||||
self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1])
|
||||
if dic_ann['aux']:
|
||||
self.auxs = dic_ann['aux'] if dic_ann['aux'] else None
|
||||
self.auxs = dic_ann['aux']
|
||||
if len(self.auxs) == 0:
|
||||
self.modes = ['mono'] * len(self.dd_pred)
|
||||
else:
|
||||
self.modes = []
|
||||
for aux in self.auxs:
|
||||
if aux <= 0.3:
|
||||
self.modes.append('mono')
|
||||
else:
|
||||
self.modes.append('stereo')
|
||||
|
||||
def factory_axes(self):
|
||||
def factory_axes(self, dic_out):
|
||||
"""Create axes for figures: front bird multi"""
|
||||
axes = []
|
||||
figures = []
|
||||
|
||||
# Process the annotation dictionary of monoloco
|
||||
self._process_results(dic_out)
|
||||
|
||||
# Initialize multi figure, resizing it for aesthetic proportion
|
||||
if 'multi' in self.output_types:
|
||||
assert 'bird' and 'front' not in self.output_types, \
|
||||
@ -150,10 +165,7 @@ class Printer:
|
||||
axes.append(ax1)
|
||||
return figures, axes
|
||||
|
||||
def draw(self, figures, axes, dic_out, image):
|
||||
|
||||
# Process the annotation dictionary of monoloco
|
||||
self._process_results(dic_out)
|
||||
def draw(self, figures, axes, image):
|
||||
|
||||
# whether to include instances that don't match the ground-truth
|
||||
iterator = range(len(self.zz_pred)) if self.show_all else range(len(self.zz_gt))
|
||||
@ -163,9 +175,9 @@ class Printer:
|
||||
|
||||
# Draw the front figure
|
||||
number = dict(flag=False, num=97)
|
||||
if 'multi' in self.output_types:
|
||||
if any(xx in self.output_types for xx in ['front', 'multi']):
|
||||
number['flag'] = True # add numbers
|
||||
self.mpl_im0.set_data(image)
|
||||
self.mpl_im0.set_data(image)
|
||||
for idx in iterator:
|
||||
if any(xx in self.output_types for xx in ['front', 'multi']) and self.zz_pred[idx] > 0:
|
||||
self._draw_front(axes[0],
|
||||
@ -199,8 +211,6 @@ class Printer:
|
||||
|
||||
def _draw_front(self, ax, z, idx, number):
|
||||
|
||||
mode = 'stereo' if self.auxs[idx] > 0.3 else 'mono'
|
||||
|
||||
# Bbox
|
||||
w = min(self.width-2, self.boxes[idx][2] - self.boxes[idx][0])
|
||||
h = min(self.height-2, (self.boxes[idx][3] - self.boxes[idx][1]) * self.y_scale)
|
||||
@ -211,12 +221,12 @@ class Printer:
|
||||
width=w,
|
||||
height=h,
|
||||
fill=False,
|
||||
color=self.attr[mode]['color'],
|
||||
linewidth=self.attr[mode]['linewidth'])
|
||||
color=self.attr[self.modes[idx]]['color'],
|
||||
linewidth=self.attr[self.modes[idx]]['linewidth'])
|
||||
ax.add_patch(rectangle)
|
||||
z_str = str(z).split(sep='.')
|
||||
text = z_str[0] + '.' + z_str[1][0]
|
||||
bbox_config = {'facecolor': self.attr[mode]['color'], 'alpha': 0.4, 'linewidth': 0}
|
||||
bbox_config = {'facecolor': self.attr[self.modes[idx]]['color'], 'alpha': 0.4, 'linewidth': 0}
|
||||
|
||||
x_t = x0 - 1.5
|
||||
y_t = y1 + self.attr['y_box_margin']
|
||||
@ -236,12 +246,12 @@ class Printer:
|
||||
y1 + 14,
|
||||
chr(number['num']),
|
||||
fontsize=self.attr['fontsize_num'],
|
||||
color=self.attr[mode]['numcolor'],
|
||||
color=self.attr[self.modes[idx]]['numcolor'],
|
||||
weight='bold')
|
||||
|
||||
def _draw_text_bird(self, axes, idx, num):
|
||||
"""Plot the number in the bird eye view map"""
|
||||
mode = 'stereo' if self.auxs[idx] > 0.3 else 'mono'
|
||||
|
||||
std = self.stds_epi[idx] if self.stds_epi[idx] > 0 else self.stds_ale[idx]
|
||||
theta = math.atan2(self.zz_pred[idx], self.xx_pred[idx])
|
||||
|
||||
@ -250,7 +260,7 @@ class Printer:
|
||||
|
||||
axes[1].text(self.xx_pred[idx] + delta_x + 0.2, self.zz_pred[idx] + delta_z + 0/2, chr(num),
|
||||
fontsize=self.attr['fontsize_bv'],
|
||||
color=self.attr[mode]['numcolor'])
|
||||
color=self.attr[self.modes[idx]]['numcolor'])
|
||||
|
||||
def _draw_uncertainty(self, axes, idx):
|
||||
|
||||
|
||||
7
setup.py
@ -18,7 +18,8 @@ setup(
|
||||
'monstereo.utils'
|
||||
],
|
||||
license='GNU AGPLv3',
|
||||
description='MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization',
|
||||
description=' Perceiving Humans: from Monocular 3D Localization to Social Distancing '
|
||||
'/ MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization',
|
||||
long_description=open('README.md').read(),
|
||||
long_description_content_type='text/markdown',
|
||||
author='Lorenzo Bertoni',
|
||||
@ -27,9 +28,7 @@ setup(
|
||||
zip_safe=False,
|
||||
|
||||
install_requires=[
|
||||
'openpifpaf==0.8.0',
|
||||
'torch==1.1.0',
|
||||
'torchvision==0.3.0'
|
||||
'openpifpaf>=0.11'
|
||||
],
|
||||
extras_require={
|
||||
'eval': [
|
||||
|
||||