Merge pull request #3 from vita-epfl/update
OpenPifPaf 0.12 and cleaning
@ -9,7 +9,7 @@ Good-names=xx,dd,zz,hh,ww,pp,kk,lr,w1,w2,w3,mm,im,uv,ax,COV_MIN,CONF_MIN
|
|||||||
|
|
||||||
[TYPECHECK]
|
[TYPECHECK]
|
||||||
|
|
||||||
disable=import-error,invalid-name,unused-variable,fixme,E1102,missing-docstring,useless-object-inheritance,duplicate-code,too-many-arguments,too-many-instance-attributes,too-many-locals,too-few-public-methods,arguments-differ,logging-format-interpolation
|
disable=import-error,invalid-name,unused-variable,E1102,missing-docstring,useless-object-inheritance,duplicate-code,too-many-arguments,too-many-instance-attributes,too-many-locals,too-few-public-methods,arguments-differ,logging-format-interpolation,import-outside-toplevel
|
||||||
|
|
||||||
|
|
||||||
# List of members which are set dynamically and missed by pylint inference
|
# List of members which are set dynamically and missed by pylint inference
|
||||||
|
|||||||
4
LICENSE
@ -1,4 +1,4 @@
|
|||||||
Copyright 2020 by EPFL/VITA. All rights reserved.
|
Copyright 2020-2021 by EPFL/VITA. All rights reserved.
|
||||||
|
|
||||||
This project and all its files are licensed under
|
This project and all its files are licensed under
|
||||||
GNU AGPLv3 or later version.
|
GNU AGPLv3 or later version.
|
||||||
@ -6,4 +6,4 @@ GNU AGPLv3 or later version.
|
|||||||
If this license is not suitable for your business or project
|
If this license is not suitable for your business or project
|
||||||
please contact EPFL-TTO (https://tto.epfl.ch/) for a full commercial license.
|
please contact EPFL-TTO (https://tto.epfl.ch/) for a full commercial license.
|
||||||
|
|
||||||
This software may not be used to harm any person deliberately.
|
This software may not be used to harm any person deliberately or for any military application.
|
||||||
|
|||||||
66
README.md
@ -1,19 +1,61 @@
|
|||||||
# Perceiving Humans in 3D
|
# Perceiving Humans in 3D
|
||||||
|
|
||||||
This repository contains the code for three research projects:
|
This repository contains the code for two research projects:
|
||||||
|
|
||||||
1. **MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization**
|
1. **Perceiving Humans: from Monocular 3D Localization to Social Distancing (MonoLoco++)**
|
||||||
[README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonStereo.md) & [Article](https://arxiv.org/abs/2008.10913)
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
2. **Perceiving Humans: from Monocular 3D Localization to Social Distancing**
|
|
||||||
[README](https://github.com/vita-epfl/monstereo/blob/update/docs/MonoLoco%2B%2B.md) & [Article](https://arxiv.org/abs/2009.00984)
|
[README](https://github.com/vita-epfl/monstereo/blob/update/docs/MonoLoco%2B%2B.md) & [Article](https://arxiv.org/abs/2009.00984)
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
3. **MonoLoco: Monocular 3D Pedestrian Localization and Uncertainty Estimation** (Improved!)
|

|
||||||
[README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonoLoco.md) & [Article](https://arxiv.org/abs/1906.06059) & [Original Repo](https://github.com/vita-epfl/monoloco)
|
|
||||||
|
|
||||||
|
2. **MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization**
|
||||||
|
[README](https://github.com/vita-epfl/monstereo/blob/update/docs/MonStereo.md) & [Article](https://arxiv.org/abs/2008.10913)
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||

|
Both projects has been built upon the CVPR'19 project [Openpifpaf](https://github.com/vita-epfl/openpifpaf)
|
||||||
|
for 2D pose estimation and the ICCV'19 project [MonoLoco](https://github.com/vita-epfl/monoloco) for monocular 3D localization.
|
||||||
|
All projects share the AGPL Licence.
|
||||||
|
|
||||||
|
|
||||||
|
# Setup
|
||||||
|
Installation steps are the same for both projects.
|
||||||
|
|
||||||
|
### Install
|
||||||
|
The installation has been tested on OSX and Linux operating systems, with Python 3.6 or Python 3.7.
|
||||||
|
Packages have been installed with pip and virtual environments.
|
||||||
|
For quick installation, do not clone this repository,
|
||||||
|
and make sure there is no folder named monstereo in your current directory.
|
||||||
|
A GPU is not required, yet highly recommended for real-time performances.
|
||||||
|
MonStereo can be installed as a package, by:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip3 install monstereo
|
||||||
|
```
|
||||||
|
|
||||||
|
For development of the monstereo source code itself, you need to clone this repository and then:
|
||||||
|
```
|
||||||
|
pip3 install sdist
|
||||||
|
cd monstereo
|
||||||
|
python3 setup.py sdist bdist_wheel
|
||||||
|
pip3 install -e .
|
||||||
|
```
|
||||||
|
|
||||||
|
### Interfaces
|
||||||
|
All the commands are run through a main file called `main.py` using subparsers.
|
||||||
|
To check all the commands for the parser and the subparsers (including openpifpaf ones) run:
|
||||||
|
|
||||||
|
* `python3 -m monstereo.run --help`
|
||||||
|
* `python3 -m monstereo.run predict --help`
|
||||||
|
* `python3 -m monstereo.run train --help`
|
||||||
|
* `python3 -m monstereo.run eval --help`
|
||||||
|
* `python3 -m monstereo.run prep --help`
|
||||||
|
|
||||||
|
or check the file `monstereo/run.py`
|
||||||
|
|
||||||
|
Further instructions for prediction, preprocessing, training and evaluation can be found here:
|
||||||
|
|
||||||
|
* [MonStereo README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonStereo.md)
|
||||||
|
* [MonoLoco++ README](https://github.com/vita-epfl/monstereo/tree/master/docs/MonoLoco_pp.md)
|
||||||
|
|||||||
BIN
docs/000840_multi.jpg
Normal file
|
After Width: | Height: | Size: 633 KiB |
BIN
docs/002282.png
Executable file
|
After Width: | Height: | Size: 831 KiB |
@ -24,53 +24,15 @@ month = {August},
|
|||||||
year = {2020}
|
year = {2020}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# Prediction
|
||||||
|
The predict script receives an image (or an entire folder using glob expressions),
|
||||||
|
calls PifPaf for 2d human pose detection over the image
|
||||||
|
and runs MonStereo for 3d location of the detected poses.
|
||||||
|
|
||||||
# Features
|
Output options include json files and/or visualization of the predictions on the image in *frontal mode*,
|
||||||
The code has been built upon the ICCV'19 project [MonoLoco](https://github.com/vita-epfl/monoloco).
|
*birds-eye-view mode* or *multi mode* and can be specified with `--output_types`
|
||||||
This repository supports
|
|
||||||
|
|
||||||
* the original MonoLoco
|
|
||||||
* An improved Monocular version (MonoLoco++) for x,y,z coordinates, orientation, and dimensions
|
|
||||||
* MonStereo
|
|
||||||
|
|
||||||
# Setup
|
|
||||||
|
|
||||||
### Install
|
|
||||||
The installation has been tested on OSX and Linux operating systems, with Python 3.6 or Python 3.7.
|
|
||||||
Packages have been installed with pip and virtual environments.
|
|
||||||
For quick installation, do not clone this repository,
|
|
||||||
and make sure there is no folder named monstereo in your current directory.
|
|
||||||
A GPU is not required, yet highly recommended for real-time performances.
|
|
||||||
MonStereo can be installed as a package, by:
|
|
||||||
|
|
||||||
```
|
|
||||||
pip3 install monstereo
|
|
||||||
```
|
|
||||||
|
|
||||||
For development of the monstereo source code itself, you need to clone this repository and then:
|
|
||||||
```
|
|
||||||
pip3 install sdist
|
|
||||||
cd monstereo
|
|
||||||
python3 setup.py sdist bdist_wheel
|
|
||||||
pip3 install -e .
|
|
||||||
```
|
|
||||||
|
|
||||||
### Data structure
|
|
||||||
|
|
||||||
Data
|
|
||||||
├── arrays
|
|
||||||
├── models
|
|
||||||
├── kitti
|
|
||||||
├── logs
|
|
||||||
├── output
|
|
||||||
|
|
||||||
|
|
||||||
Run the following to create the folders:
|
|
||||||
```
|
|
||||||
mkdir data
|
|
||||||
cd data
|
|
||||||
mkdir arrays models kitti logs output
|
|
||||||
```
|
|
||||||
|
|
||||||
### Pre-trained Models
|
### Pre-trained Models
|
||||||
* Download Monstereo pre-trained model from
|
* Download Monstereo pre-trained model from
|
||||||
@ -85,27 +47,6 @@ Alternatively, you can download a Pifpaf pre-trained model from [openpifpaf](htt
|
|||||||
If you'd like to use an updated version, we suggest to re-train the MonStereo model as well.
|
If you'd like to use an updated version, we suggest to re-train the MonStereo model as well.
|
||||||
* The model for the experiments is provided in *data/models/ms-200710-1511.pkl*
|
* The model for the experiments is provided in *data/models/ms-200710-1511.pkl*
|
||||||
|
|
||||||
# Interfaces
|
|
||||||
All the commands are run through a main file called `main.py` using subparsers.
|
|
||||||
To check all the commands for the parser and the subparsers (including openpifpaf ones) run:
|
|
||||||
|
|
||||||
* `python3 -m monstereo.run --help`
|
|
||||||
* `python3 -m monstereo.run predict --help`
|
|
||||||
* `python3 -m monstereo.run train --help`
|
|
||||||
* `python3 -m monstereo.run eval --help`
|
|
||||||
* `python3 -m monstereo.run prep --help`
|
|
||||||
|
|
||||||
or check the file `monstereo/run.py`
|
|
||||||
|
|
||||||
# Prediction
|
|
||||||
The predict script receives an image (or an entire folder using glob expressions),
|
|
||||||
calls PifPaf for 2d human pose detection over the image
|
|
||||||
and runs MonStereo for 3d location of the detected poses.
|
|
||||||
|
|
||||||
|
|
||||||
Output options include json files and/or visualization of the predictions on the image in *frontal mode*,
|
|
||||||
*birds-eye-view mode* or *multi mode* and can be specified with `--output_types`
|
|
||||||
|
|
||||||
|
|
||||||
### Ground truth matching
|
### Ground truth matching
|
||||||
* In case you provide a ground-truth json file to compare the predictions of MonSter,
|
* In case you provide a ground-truth json file to compare the predictions of MonSter,
|
||||||
@ -125,13 +66,13 @@ After downloading model and ground-truth file, a demo can be tested with the fol
|
|||||||
--model data/models/ms-200710-1511.pkl --z_max 30 --checkpoint resnet152 --path_gt data/arrays/names-kitti-200615-1022.json
|
--model data/models/ms-200710-1511.pkl --z_max 30 --checkpoint resnet152 --path_gt data/arrays/names-kitti-200615-1022.json
|
||||||
-o data/output`
|
-o data/output`
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
`python3 -m monstereo.run predict --glob docs/005523*.png --output_types multi --scale 2
|
`python3 -m monstereo.run predict --glob docs/005523*.png --output_types multi --scale 2
|
||||||
--model data/models/ms-200710-1511.pkl --z_max 30 --checkpoint resnet152 --path_gt data/arrays/names-kitti-200615-1022.json
|
--model data/models/ms-200710-1511.pkl --z_max 30 --checkpoint resnet152 --path_gt data/arrays/names-kitti-200615-1022.json
|
||||||
-o data/output`
|
-o data/output`
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
# Preprocessing
|
# Preprocessing
|
||||||
Preprocessing and training step are already fully supported by the code provided,
|
Preprocessing and training step are already fully supported by the code provided,
|
||||||
@ -139,6 +80,22 @@ but require first to run a pose detector over
|
|||||||
all the training images and collect the annotations.
|
all the training images and collect the annotations.
|
||||||
The code supports this option (by running the predict script and using `--mode pifpaf`).
|
The code supports this option (by running the predict script and using `--mode pifpaf`).
|
||||||
|
|
||||||
|
### Data structure
|
||||||
|
|
||||||
|
Data
|
||||||
|
├── arrays
|
||||||
|
├── models
|
||||||
|
├── kitti
|
||||||
|
├── logs
|
||||||
|
├── output
|
||||||
|
|
||||||
|
Run the following to create the folders:
|
||||||
|
```
|
||||||
|
mkdir data
|
||||||
|
cd data
|
||||||
|
mkdir arrays models kitti logs output
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### Datasets
|
### Datasets
|
||||||
Download KITTI ground truth files and camera calibration matrices for training
|
Download KITTI ground truth files and camera calibration matrices for training
|
||||||
@ -149,13 +106,20 @@ data/kitti/images`
|
|||||||
|
|
||||||
|
|
||||||
### Annotations to preprocess
|
### Annotations to preprocess
|
||||||
MonStereo is trained using 2D human pose joints. To create them run pifaf over KITTI training images.
|
MonStereo is trained using 2D human pose joints. To obtain the joints the first step is to run
|
||||||
You can create them running the predict script and using `--mode pifpaf`.
|
pifaf over KITTI training images, by either running the predict script and using `--mode pifpaf`,
|
||||||
|
or by using pifpaf code directly.
|
||||||
|
MonStereo preprocess script expects annotations from left and right images in 2 different folders
|
||||||
|
with the same path apart from the suffix `_right` for the ``right" folder.
|
||||||
|
For example `data/annotations` and `data/annotations_right`.
|
||||||
|
Do not change name of json files created by pifpaf. For each left annotation,
|
||||||
|
the code will look for the corresponding right annotation.
|
||||||
|
|
||||||
### Inputs joints for training
|
### Inputs joints for training
|
||||||
MonoStereo is trained using 2D human pose joints matched with the ground truth location provided by
|
MonoStereo is trained using 2D human pose joints matched with the ground truth location provided by
|
||||||
KITTI Dataset. To create the joints run: `python3 -m monstereo.run prep` specifying:
|
KITTI Dataset. To create the joints run: `python3 -m monstereo.run prep` specifying:
|
||||||
1. `--dir_ann` annotation directory containing Pifpaf joints of KITTI.
|
|
||||||
|
`--dir_ann` annotation directory containing Pifpaf joints of KITTI for the left images.
|
||||||
|
|
||||||
|
|
||||||
### Ground truth file for evaluation
|
### Ground truth file for evaluation
|
||||||
@ -165,7 +129,7 @@ by the image name to easily access ground truth files for evaluation and predict
|
|||||||
|
|
||||||
# Training
|
# Training
|
||||||
Provide the json file containing the preprocess joints as argument.
|
Provide the json file containing the preprocess joints as argument.
|
||||||
As simple as `python3 -m monstereo.run train --joints <json file path>`
|
As simple as `python3 -m monstereo.run train --joints <json file path> `
|
||||||
All the hyperparameters options can be checked at `python3 -m monstereo.run train --help`.
|
All the hyperparameters options can be checked at `python3 -m monstereo.run train --help`.
|
||||||
|
|
||||||
# Evaluation (KITTI Dataset)
|
# Evaluation (KITTI Dataset)
|
||||||
|
|||||||
216
docs/MonoLoco++.md
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
|
||||||
|
# Perceiving Humans: from Monocular 3D Localization to Social Distancing
|
||||||
|
|
||||||
|
> Perceiving humans in the context of Intelligent Transportation Systems (ITS)
|
||||||
|
often relies on multiple cameras or expensive LiDAR sensors.
|
||||||
|
In this work, we present a new cost- effective vision-based method that perceives humans’ locations in 3D
|
||||||
|
and their body orientation from a single image.
|
||||||
|
We address the challenges related to the ill-posed monocular 3D tasks by proposing a deep learning method
|
||||||
|
that predicts confidence intervals in contrast to point estimates. Our neural network architecture estimates
|
||||||
|
humans 3D body locations and their orientation with a measure of uncertainty.
|
||||||
|
Our vision-based system (i) is privacy-safe, (ii) works with any fixed or moving cameras,
|
||||||
|
and (iii) does not rely on ground plane estimation.
|
||||||
|
We demonstrate the performance of our method with respect to three applications:
|
||||||
|
locating humans in 3D, detecting social interactions,
|
||||||
|
and verifying the compliance of recent safety measures due to the COVID-19 outbreak.
|
||||||
|
Indeed, we show that we can rethink the concept of “social distancing” as a form of social interaction
|
||||||
|
in contrast to a simple location-based rule. We publicly share the source code towards an open science mission.
|
||||||
|
|
||||||
|
```
|
||||||
|
@InProceedings{bertoni_social,
|
||||||
|
author = {Bertoni, Lorenzo and Kreiss, Sven and Alahi, Alexandre},
|
||||||
|
title={Perceiving Humans: from Monocular 3D Localization to Social Distancing},
|
||||||
|
booktitle = {arXiv:2009.00984},
|
||||||
|
month = {September},
|
||||||
|
year = {2020}
|
||||||
|
}
|
||||||
|
```
|
||||||
|

|
||||||
|
|
||||||
|
## Predictions
|
||||||
|
For a quick setup download a pifpaf and a MonoLoco++ models from
|
||||||
|
[here](https://drive.google.com/drive/folders/1jZToVMBEZQMdLB5BAIq2CdCLP5kzNo9t?usp=sharing)
|
||||||
|
and save them into `data/models`.
|
||||||
|
|
||||||
|
### 3D Localization
|
||||||
|
The predict script receives an image (or an entire folder using glob expressions),
|
||||||
|
calls PifPaf for 2d human pose detection over the image
|
||||||
|
and runs Monoloco++ for 3d location of the detected poses.
|
||||||
|
The command `--net` defines if saving pifpaf outputs, MonoLoco++ outputs or MonStereo ones.
|
||||||
|
You can check all commands for Pifpaf at [openpifpaf](https://github.com/vita-epfl/openpifpaf).
|
||||||
|
|
||||||
|
Output options include json files and/or visualization of the predictions on the image in *frontal mode*,
|
||||||
|
*birds-eye-view mode* or *combined mode* and can be specified with `--output_types`
|
||||||
|
|
||||||
|
Ground-truth KITTI files for comparing results can be downloaded from
|
||||||
|
[here](https://drive.google.com/drive/folders/1jZToVMBEZQMdLB5BAIq2CdCLP5kzNo9t?usp=sharing)
|
||||||
|
(file called *names-kitti*) and should be saved into `data/arrays`
|
||||||
|
Ground-truth files can also be generated, more info in the preprocessing section.
|
||||||
|
|
||||||
|
For an example image, run the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
python -m monstereo.run predict \
|
||||||
|
docs/002282.png \
|
||||||
|
--net monoloco_pp \
|
||||||
|
--output_types multi \
|
||||||
|
--model data/models/monoloco_pp-201203-1424.pkl \
|
||||||
|
--path_gt data/arrays/names-kitti-200615-1022.json \
|
||||||
|
-o <output directory> \
|
||||||
|
--long-edge <rescale the image by providing dimension of long side. If None original resolution>
|
||||||
|
--n_dropout <50 to include epistemic uncertainty, 0 otherwise>
|
||||||
|
```
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
To show all the instances estimated by MonoLoco add the argument `show_all` to the above command.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Social Distancing
|
||||||
|
To visualize social distancing compliance, simply add the argument `--social-distance` to the predict command.
|
||||||
|
|
||||||
|
An example from the Collective Activity Dataset is provided below.
|
||||||
|
|
||||||
|
<img src="frame0038.jpg" width="500"/>
|
||||||
|
|
||||||
|
To visualize social distancing run the below, command:
|
||||||
|
```
|
||||||
|
python -m monstereo.run predict \
|
||||||
|
docs/frame0038.jpg \
|
||||||
|
--net monoloco_pp \
|
||||||
|
--social_distance \
|
||||||
|
--output_types front bird --show_all \
|
||||||
|
--model data/models/monoloco_pp-201203-1424.pkl -o <output directory>
|
||||||
|
```
|
||||||
|
<img src="out_frame0038.jpg.front.png" width="400"/>
|
||||||
|
|
||||||
|
|
||||||
|
<img src="out_frame0038.jpg.bird.png" width="400"/>
|
||||||
|
|
||||||
|
Threshold distance and radii (for F-formations) can be set using `--threshold-dist` and `--radii`, respectively.
|
||||||
|
|
||||||
|
For more info, run:
|
||||||
|
|
||||||
|
`python -m monstereo.run predict --help`
|
||||||
|
|
||||||
|
### Orientation and Bounding Box dimensions
|
||||||
|
MonoLoco++ estimates orientation and box dimensions as well. Results are saved in a json file when using the command
|
||||||
|
`--output_types json`. At the moment, the only visualization including orientation is the social distancing one.
|
||||||
|
|
||||||
|
## Preprocessing
|
||||||
|
|
||||||
|
### Kitti
|
||||||
|
Annotations from a pose detector needs to be stored in a folder.
|
||||||
|
For example by using [openpifpaf](https://github.com/vita-epfl/openpifpaf):
|
||||||
|
```
|
||||||
|
python -m openpifpaf.predict \
|
||||||
|
--glob "<kitti images directory>/*.png" \
|
||||||
|
--json-output <directory to contain predictions>
|
||||||
|
--checkpoint=shufflenetv2k30 \
|
||||||
|
--instance-threshold=0.05 --seed-threshold 0.05 --force-complete-pose
|
||||||
|
```
|
||||||
|
Once the step is complete:
|
||||||
|
`python -m monstereo.run prep --dir_ann <directory that contains predictions> --monocular`
|
||||||
|
|
||||||
|
|
||||||
|
### Collective Activity Dataset
|
||||||
|
To evaluate on of the [collective activity dataset](http://vhosts.eecs.umich.edu/vision//activity-dataset.html)
|
||||||
|
(without any training) we selected 6 scenes that contain people talking to each other.
|
||||||
|
This allows for a balanced dataset, but any other configuration will work.
|
||||||
|
|
||||||
|
THe expected structure for the dataset is the following:
|
||||||
|
|
||||||
|
collective_activity
|
||||||
|
├── images
|
||||||
|
├── annotations
|
||||||
|
|
||||||
|
where images and annotations inside have the following name convention:
|
||||||
|
|
||||||
|
IMAGES: seq<sequence_name>_frame<frame_name>.jpg
|
||||||
|
ANNOTATIONS: seq<sequence_name>_annotations.txt
|
||||||
|
|
||||||
|
With respect to the original datasets the images and annotations are moved to a single folder
|
||||||
|
and the sequence is added in their name. One command to do this is:
|
||||||
|
|
||||||
|
`rename -v -n 's/frame/seq14_frame/' f*.jpg`
|
||||||
|
|
||||||
|
which for example change the name of all the jpg images in that folder adding the sequence number
|
||||||
|
(remove `-n` after checking it works)
|
||||||
|
|
||||||
|
Pifpaf annotations should also be saved in a single folder and can be created with:
|
||||||
|
|
||||||
|
```
|
||||||
|
python -m openpifpaf.predict \
|
||||||
|
--glob "data/collective_activity/images/*.jpg" \
|
||||||
|
--checkpoint=shufflenetv2k30 \
|
||||||
|
--instance-threshold=0.05 --seed-threshold 0.05 --force-complete-pose\
|
||||||
|
--json-output /data/lorenzo-data/annotations/collective_activity/v012
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, to evaluate activity using a MonoLoco++ pre-trained model trained either on nuSCENES or KITTI:
|
||||||
|
```
|
||||||
|
python -m monstereo.run eval --activity \
|
||||||
|
--net monoloco_pp --dataset collective \
|
||||||
|
--model <MonoLoco++ model path> --dir_ann <pifpaf annotations directory>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Training
|
||||||
|
We train on KITTI or nuScenes dataset specifying the path of the input joints.
|
||||||
|
|
||||||
|
Our results are obtained with:
|
||||||
|
|
||||||
|
`python -m monstereo.run train --lr 0.001 --joints data/arrays/joints-kitti-201202-1743.json --save --monocular`
|
||||||
|
|
||||||
|
For a more extensive list of available parameters, run:
|
||||||
|
|
||||||
|
`python -m monstereo.run train --help`
|
||||||
|
|
||||||
|
## Evaluation
|
||||||
|
|
||||||
|
### 3D Localization
|
||||||
|
We provide evaluation on KITTI for models trained on nuScenes or KITTI. We compare them with other monocular
|
||||||
|
and stereo Baselines:
|
||||||
|
|
||||||
|
[MonoLoco](https://github.com/vita-epfl/monoloco),
|
||||||
|
[Mono3D](https://www.cs.toronto.edu/~urtasun/publications/chen_etal_cvpr16.pdf),
|
||||||
|
[3DOP](https://xiaozhichen.github.io/papers/nips15chen.pdf),
|
||||||
|
[MonoDepth](https://arxiv.org/abs/1609.03677)
|
||||||
|
[MonoPSR](https://github.com/kujason/monopsr) and our
|
||||||
|
[MonoDIS](https://research.mapillary.com/img/publications/MonoDIS.pdf) and our
|
||||||
|
[Geometrical Baseline](monoloco/eval/geom_baseline.py).
|
||||||
|
|
||||||
|
* **Mono3D**: download validation files from [here](http://3dimage.ee.tsinghua.edu.cn/cxz/mono3d)
|
||||||
|
and save them into `data/kitti/m3d`
|
||||||
|
* **3DOP**: download validation files from [here](https://xiaozhichen.github.io/)
|
||||||
|
and save them into `data/kitti/3dop`
|
||||||
|
* **MonoDepth**: compute an average depth for every instance using the following script
|
||||||
|
[here](https://github.com/Parrotlife/pedestrianDepth-baseline/tree/master/MonoDepth-PyTorch)
|
||||||
|
and save them into `data/kitti/monodepth`
|
||||||
|
* **GeometricalBaseline**: A geometrical baseline comparison is provided.
|
||||||
|
|
||||||
|
Download the model for monoloco
|
||||||
|
|
||||||
|
The average geometrical value for comparison can be obtained running:
|
||||||
|
```
|
||||||
|
python -m monstereo.run eval
|
||||||
|
--dir_ann <annotation directory>
|
||||||
|
--model <model path>
|
||||||
|
--net monoloco_pp
|
||||||
|
--generate
|
||||||
|
````
|
||||||
|
|
||||||
|
To include also geometric baselines and MonoLoco, add the flag ``--baselines``
|
||||||
|
|
||||||
|
### Activity Estimation (Talking)
|
||||||
|
Please follow preprocessing steps for Collective activity dataset and run pifpaf over the dataset images.
|
||||||
|
Evaluation on this dataset is done with models trained on either KITTI or nuScenes.
|
||||||
|
For optimal performances, we suggest the model trained on nuScenes teaser (TODO add link)
|
||||||
|
```
|
||||||
|
python -m monstereo.run eval
|
||||||
|
--activity
|
||||||
|
--dataset collective
|
||||||
|
--net monoloco_pp
|
||||||
|
--model <path to the model>
|
||||||
|
--dir_ann <annotation directory>
|
||||||
|
```
|
||||||
@ -1,14 +0,0 @@
|
|||||||
|
|
||||||
### Work in Progress
|
|
||||||
|
|
||||||
For the moment please refer to the [original repository](https://github.com/vita-epfl/monoloco)
|
|
||||||
|
|
||||||
```
|
|
||||||
@InProceedings{bertoni_perceiving,
|
|
||||||
author = {Bertoni, Lorenzo and Kreiss, Sven and Alahi, Alexandre},
|
|
||||||
title = {Perceiving Humans: from Monocular 3D Localization to Social Distancing},
|
|
||||||
booktitle = {arXiv:2009.00984},
|
|
||||||
month = {September},
|
|
||||||
year = {2020}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
BIN
docs/frame0038.jpg
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/out_000840.jpg
Normal file
|
After Width: | Height: | Size: 197 KiB |
|
Before Width: | Height: | Size: 584 KiB |
BIN
docs/out_002282.png.multi.jpg
Normal file
|
After Width: | Height: | Size: 398 KiB |
BIN
docs/out_002282.png.multi_all.jpg
Normal file
|
After Width: | Height: | Size: 344 KiB |
BIN
docs/out_005523.jpg
Normal file
|
After Width: | Height: | Size: 255 KiB |
|
Before Width: | Height: | Size: 761 KiB |
BIN
docs/out_frame0038.jpg.bird.png
Normal file
|
After Width: | Height: | Size: 27 KiB |
BIN
docs/out_frame0038.jpg.front.png
Normal file
|
After Width: | Height: | Size: 184 KiB |
BIN
docs/pull_sd.png
|
Before Width: | Height: | Size: 2.2 MiB |
BIN
docs/social_distancing.jpg
Normal file
|
After Width: | Height: | Size: 289 KiB |
BIN
docs/truck.jpg
Normal file
|
After Width: | Height: | Size: 149 KiB |
BIN
docs/truck.png
|
Before Width: | Height: | Size: 460 KiB |
@ -2,24 +2,16 @@
|
|||||||
# pylint: disable=too-many-statements
|
# pylint: disable=too-many-statements
|
||||||
|
|
||||||
import math
|
import math
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
import copy
|
import copy
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
|
||||||
import torchvision
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from matplotlib.patches import Circle, FancyArrow
|
from matplotlib.patches import Circle, FancyArrow
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from .network.process import laplace_sampling
|
from .network.process import laplace_sampling
|
||||||
from .utils import open_annotations
|
|
||||||
from .visuals.pifpaf_show import KeypointPainter, image_canvas
|
from .visuals.pifpaf_show import KeypointPainter, image_canvas
|
||||||
from .network import Loco
|
|
||||||
from .network.process import factory_for_gt, preprocess_pifpaf
|
|
||||||
|
|
||||||
|
|
||||||
def social_interactions(idx, centers, angles, dds, stds=None, social_distance=False,
|
def social_interactions(idx, centers, angles, dds, stds=None, social_distance=False,
|
||||||
@ -27,17 +19,20 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa
|
|||||||
"""
|
"""
|
||||||
return flag of alert if social distancing is violated
|
return flag of alert if social distancing is violated
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# A) Check whether people are close together
|
||||||
xx = centers[idx][0]
|
xx = centers[idx][0]
|
||||||
zz = centers[idx][1]
|
zz = centers[idx][1]
|
||||||
distances = [math.sqrt((xx - centers[i][0]) ** 2 + (zz - centers[i][1]) ** 2) for i, _ in enumerate(centers)]
|
distances = [math.sqrt((xx - centers[i][0]) ** 2 + (zz - centers[i][1]) ** 2) for i, _ in enumerate(centers)]
|
||||||
sorted_idxs = np.argsort(distances)
|
sorted_idxs = np.argsort(distances)
|
||||||
indices = [idx_t for idx_t in sorted_idxs[1:] if distances[idx_t] <= threshold_dist]
|
indices = [idx_t for idx_t in sorted_idxs[1:] if distances[idx_t] <= threshold_dist]
|
||||||
|
|
||||||
|
# B) Check whether people are looking inwards and whether there are no intrusions
|
||||||
# Deterministic
|
# Deterministic
|
||||||
if n_samples < 2:
|
if n_samples < 2:
|
||||||
for idx_t in indices:
|
for idx_t in indices:
|
||||||
if check_f_formations(idx, idx_t, centers, angles,
|
if check_f_formations(idx, idx_t, centers, angles,
|
||||||
radii=radii, # Binary value
|
radii=radii, # Binary value
|
||||||
social_distance=social_distance):
|
social_distance=social_distance):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -72,8 +67,8 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa
|
|||||||
|
|
||||||
def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False):
|
def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False):
|
||||||
"""
|
"""
|
||||||
Check F-formations for people close together:
|
Check F-formations for people close together (this function do not expect far away people):
|
||||||
1) Empty space of 0.4 + meters (no other people or themselves inside)
|
1) Empty space of a certain radius (no other people or themselves inside)
|
||||||
2) People looking inward
|
2) People looking inward
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -91,119 +86,25 @@ def check_f_formations(idx, idx_t, centers, angles, radii, social_distance=False
|
|||||||
mu_1 = np.array([centers[idx_t][0] + radius * math.cos(theta1), centers[idx_t][1] - radius * math.sin(theta1)])
|
mu_1 = np.array([centers[idx_t][0] + radius * math.cos(theta1), centers[idx_t][1] - radius * math.sin(theta1)])
|
||||||
o_c = (mu_0 + mu_1) / 2
|
o_c = (mu_0 + mu_1) / 2
|
||||||
|
|
||||||
# Verify they are looking inwards.
|
# 1) Verify they are looking inwards.
|
||||||
# The distance between mus and the center should be less wrt the original position and the center
|
# The distance between mus and the center should be less wrt the original position and the center
|
||||||
d_new = np.linalg.norm(mu_0 - mu_1) / 2 if social_distance else np.linalg.norm(mu_0 - mu_1)
|
d_new = np.linalg.norm(mu_0 - mu_1) / 2 if social_distance else np.linalg.norm(mu_0 - mu_1)
|
||||||
d_0 = np.linalg.norm(x_0 - o_c)
|
d_0 = np.linalg.norm(x_0 - o_c)
|
||||||
d_1 = np.linalg.norm(x_1 - o_c)
|
d_1 = np.linalg.norm(x_1 - o_c)
|
||||||
|
|
||||||
# Verify no intrusion for third parties
|
# 2) Verify no intrusion for third parties
|
||||||
if other_centers.size:
|
if other_centers.size:
|
||||||
other_distances = np.linalg.norm(other_centers - o_c.reshape(1, -1), axis=1)
|
other_distances = np.linalg.norm(other_centers - o_c.reshape(1, -1), axis=1)
|
||||||
else:
|
else:
|
||||||
other_distances = 100 * np.ones((1, 1)) # Condition verified if no other people
|
other_distances = 100 * np.ones((1, 1)) # Condition verified if no other people
|
||||||
|
|
||||||
# Binary Classification
|
# Binary Classification
|
||||||
|
# if np.min(other_distances) > radius: # Ablation without orientation
|
||||||
if d_new <= min(d_0, d_1) and np.min(other_distances) > radius:
|
if d_new <= min(d_0, d_1) and np.min(other_distances) > radius:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def predict(args):
|
|
||||||
|
|
||||||
cnt = 0
|
|
||||||
args.device = torch.device('cpu')
|
|
||||||
if torch.cuda.is_available():
|
|
||||||
args.device = torch.device('cuda')
|
|
||||||
|
|
||||||
# Load data and model
|
|
||||||
monoloco = Loco(model=args.model, net='monoloco_pp',
|
|
||||||
device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
|
||||||
|
|
||||||
images = []
|
|
||||||
images += glob.glob(args.glob) # from cli as a string or linux converts
|
|
||||||
|
|
||||||
# Option 1: Run PifPaf extract poses and run MonoLoco in a single forward pass
|
|
||||||
if args.json_dir is None:
|
|
||||||
from .network import PifPaf, ImageList
|
|
||||||
pifpaf = PifPaf(args)
|
|
||||||
data = ImageList(args.images, scale=args.scale)
|
|
||||||
data_loader = torch.utils.data.DataLoader(
|
|
||||||
data, batch_size=1, shuffle=False,
|
|
||||||
pin_memory=args.pin_memory, num_workers=args.loader_workers)
|
|
||||||
|
|
||||||
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
|
|
||||||
images = image_tensors.permute(0, 2, 3, 1)
|
|
||||||
|
|
||||||
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
|
|
||||||
fields_batch = pifpaf.fields(processed_images)
|
|
||||||
|
|
||||||
# unbatch
|
|
||||||
for image_path, image, processed_image_cpu, fields in zip(
|
|
||||||
image_paths, images, processed_images_cpu, fields_batch):
|
|
||||||
|
|
||||||
if args.output_directory is None:
|
|
||||||
output_path = image_path
|
|
||||||
else:
|
|
||||||
file_name = os.path.basename(image_path)
|
|
||||||
output_path = os.path.join(args.output_directory, file_name)
|
|
||||||
im_size = (float(image.size()[1] / args.scale),
|
|
||||||
float(image.size()[0] / args.scale))
|
|
||||||
|
|
||||||
print('image', idx, image_path, output_path)
|
|
||||||
|
|
||||||
_, _, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
|
|
||||||
|
|
||||||
kk, dic_gt = factory_for_gt(im_size, name=image_path, path_gt=args.path_gt)
|
|
||||||
image_t = image # Resized tensor
|
|
||||||
|
|
||||||
# Run Monoloco
|
|
||||||
boxes, keypoints = preprocess_pifpaf(pifpaf_out, im_size, enlarge_boxes=False)
|
|
||||||
dic_out = monoloco.forward(keypoints, kk)
|
|
||||||
dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=False)
|
|
||||||
|
|
||||||
# Print
|
|
||||||
show_social(args, image_t, output_path, pifpaf_out, dic_out)
|
|
||||||
|
|
||||||
print('Image {}\n'.format(cnt) + '-' * 120)
|
|
||||||
cnt += 1
|
|
||||||
|
|
||||||
# Option 2: Load json file of poses from PifPaf and run monoloco
|
|
||||||
else:
|
|
||||||
for idx, im_path in enumerate(images):
|
|
||||||
|
|
||||||
# Load image
|
|
||||||
with open(im_path, 'rb') as f:
|
|
||||||
image = Image.open(f).convert('RGB')
|
|
||||||
if args.output_directory is None:
|
|
||||||
output_path = im_path
|
|
||||||
else:
|
|
||||||
file_name = os.path.basename(im_path)
|
|
||||||
output_path = os.path.join(args.output_directory, file_name)
|
|
||||||
|
|
||||||
im_size = (float(image.size[0] / args.scale),
|
|
||||||
float(image.size[1] / args.scale)) # Width, Height (original)
|
|
||||||
kk, dic_gt = factory_for_gt(im_size, name=im_path, path_gt=args.path_gt)
|
|
||||||
image_t = torchvision.transforms.functional.to_tensor(image).permute(1, 2, 0)
|
|
||||||
|
|
||||||
# Load json
|
|
||||||
basename, ext = os.path.splitext(os.path.basename(im_path))
|
|
||||||
|
|
||||||
extension = ext + '.pifpaf.json'
|
|
||||||
path_json = os.path.join(args.json_dir, basename + extension)
|
|
||||||
annotations = open_annotations(path_json)
|
|
||||||
|
|
||||||
# Run Monoloco
|
|
||||||
boxes, keypoints = preprocess_pifpaf(annotations, im_size, enlarge_boxes=False)
|
|
||||||
dic_out = monoloco.forward(keypoints, kk)
|
|
||||||
dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=False)
|
|
||||||
# Print
|
|
||||||
show_social(args, image_t, output_path, annotations, dic_out)
|
|
||||||
|
|
||||||
print('Image {}\n'.format(cnt) + '-' * 120)
|
|
||||||
cnt += 1
|
|
||||||
|
|
||||||
|
|
||||||
def show_social(args, image_t, output_path, annotations, dic_out):
|
def show_social(args, image_t, output_path, annotations, dic_out):
|
||||||
"""Output frontal image with poses or combined with bird eye view"""
|
"""Output frontal image with poses or combined with bird eye view"""
|
||||||
|
|
||||||
@ -214,24 +115,17 @@ def show_social(args, image_t, output_path, annotations, dic_out):
|
|||||||
stds = dic_out['stds_ale']
|
stds = dic_out['stds_ale']
|
||||||
xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']]
|
xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']]
|
||||||
|
|
||||||
|
# Prepare color for social distancing
|
||||||
|
colors = ['r' if social_interactions(idx, xz_centers, angles, dds,
|
||||||
|
stds=stds,
|
||||||
|
threshold_prob=args.threshold_prob,
|
||||||
|
threshold_dist=args.threshold_dist,
|
||||||
|
radii=args.radii)
|
||||||
|
else 'deepskyblue'
|
||||||
|
for idx, _ in enumerate(dic_out['xyz_pred'])]
|
||||||
|
|
||||||
|
# Draw keypoints and orientation
|
||||||
if 'front' in args.output_types:
|
if 'front' in args.output_types:
|
||||||
|
|
||||||
# Resize back the tensor image to its original dimensions
|
|
||||||
if not 0.99 < args.scale < 1.01:
|
|
||||||
size = (round(image_t.shape[0] / args.scale), round(image_t.shape[1] / args.scale)) # height width
|
|
||||||
image_t = image_t.permute(2, 0, 1).unsqueeze(0) # batch x channels x height x width
|
|
||||||
image_t = F.interpolate(image_t, size=size).squeeze().permute(1, 2, 0)
|
|
||||||
|
|
||||||
# Prepare color for social distancing
|
|
||||||
colors = ['r' if social_interactions(idx, xz_centers, angles, dds,
|
|
||||||
stds=stds,
|
|
||||||
threshold_prob=args.threshold_prob,
|
|
||||||
threshold_dist=args.threshold_dist,
|
|
||||||
radii=args.radii)
|
|
||||||
else 'deepskyblue'
|
|
||||||
for idx, _ in enumerate(dic_out['xyz_pred'])]
|
|
||||||
|
|
||||||
# Draw keypoints and orientation
|
|
||||||
keypoint_sets, scores = get_pifpaf_outputs(annotations)
|
keypoint_sets, scores = get_pifpaf_outputs(annotations)
|
||||||
uv_centers = dic_out['uv_heads']
|
uv_centers = dic_out['uv_heads']
|
||||||
sizes = [abs(dic_out['uv_heads'][idx][1] - uv_s[1]) / 1.5 for idx, uv_s in
|
sizes = [abs(dic_out['uv_heads'][idx][1] - uv_s[1]) / 1.5 for idx, uv_s in
|
||||||
@ -247,20 +141,20 @@ def show_social(args, image_t, output_path, annotations, dic_out):
|
|||||||
draw_orientation(ax, uv_centers, sizes, angles, colors, mode='front')
|
draw_orientation(ax, uv_centers, sizes, angles, colors, mode='front')
|
||||||
|
|
||||||
if 'bird' in args.output_types:
|
if 'bird' in args.output_types:
|
||||||
with bird_canvas(args, output_path) as ax1:
|
z_max = min(args.z_max, 4 + max([el[1] for el in xz_centers]))
|
||||||
|
with bird_canvas(output_path, z_max) as ax1:
|
||||||
draw_orientation(ax1, xz_centers, [], angles, colors, mode='bird')
|
draw_orientation(ax1, xz_centers, [], angles, colors, mode='bird')
|
||||||
draw_uncertainty(ax1, xz_centers, stds)
|
draw_uncertainty(ax1, xz_centers, stds)
|
||||||
|
|
||||||
|
|
||||||
def get_pifpaf_outputs(annotations):
|
def get_pifpaf_outputs(annotations):
|
||||||
|
# TODO extract direct from predictions with pifpaf 0.11+
|
||||||
"""Extract keypoints sets and scores from output dictionary"""
|
"""Extract keypoints sets and scores from output dictionary"""
|
||||||
if not annotations:
|
if not annotations:
|
||||||
return [], []
|
return [], []
|
||||||
keypoints_sets = np.array([dic['keypoints'] for dic in annotations]).reshape(-1, 17, 3)
|
keypoints_sets = np.array([dic['keypoints'] for dic in annotations]).reshape((-1, 17, 3))
|
||||||
score_weights = np.ones((keypoints_sets.shape[0], 17))
|
score_weights = np.ones((keypoints_sets.shape[0], 17))
|
||||||
score_weights[:, 3] = 3.0
|
score_weights[:, 3] = 3.0
|
||||||
# score_weights[:, 5:] = 0.1
|
|
||||||
# score_weights[:, -2:] = 0.0 # ears are not annotated
|
|
||||||
score_weights /= np.sum(score_weights[0, :])
|
score_weights /= np.sum(score_weights[0, :])
|
||||||
kps_scores = keypoints_sets[:, :, 2]
|
kps_scores = keypoints_sets[:, :, 2]
|
||||||
ordered_kps_scores = np.sort(kps_scores, axis=1)[:, ::-1]
|
ordered_kps_scores = np.sort(kps_scores, axis=1)[:, ::-1]
|
||||||
@ -269,14 +163,14 @@ def get_pifpaf_outputs(annotations):
|
|||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def bird_canvas(args, output_path):
|
def bird_canvas(output_path, z_max):
|
||||||
fig, ax = plt.subplots(1, 1)
|
fig, ax = plt.subplots(1, 1)
|
||||||
fig.set_tight_layout(True)
|
fig.set_tight_layout(True)
|
||||||
output_path = output_path + '.bird.png'
|
output_path = output_path + '.bird.png'
|
||||||
x_max = args.z_max / 1.5
|
x_max = z_max / 1.5
|
||||||
ax.plot([0, x_max], [0, args.z_max], 'k--')
|
ax.plot([0, x_max], [0, z_max], 'k--')
|
||||||
ax.plot([0, -x_max], [0, args.z_max], 'k--')
|
ax.plot([0, -x_max], [0, z_max], 'k--')
|
||||||
ax.set_ylim(0, args.z_max + 1)
|
ax.set_ylim(0, z_max + 1)
|
||||||
yield ax
|
yield ax
|
||||||
fig.savefig(output_path)
|
fig.savefig(output_path)
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|||||||
@ -23,24 +23,28 @@ class ActivityEvaluator:
|
|||||||
|
|
||||||
def __init__(self, args):
|
def __init__(self, args):
|
||||||
|
|
||||||
|
self.dir_ann = args.dir_ann
|
||||||
|
assert self.dir_ann is not None and os.path.exists(self.dir_ann), \
|
||||||
|
"Annotation directory not provided / does not exist"
|
||||||
|
assert os.listdir(self.dir_ann), "Annotation directory is empty"
|
||||||
|
|
||||||
# COLLECTIVE ACTIVITY DATASET (talking)
|
# COLLECTIVE ACTIVITY DATASET (talking)
|
||||||
# -------------------------------------------------------------------------------------------------------------
|
# -------------------------------------------------------------------------------------------------------------
|
||||||
if args.dataset == 'collective':
|
if args.dataset == 'collective':
|
||||||
self.folders_collective = ['seq02', 'seq14', 'seq12', 'seq13', 'seq11', 'seq36']
|
self.sequences = ['seq02', 'seq14', 'seq12', 'seq13', 'seq11', 'seq36']
|
||||||
# folders_collective = ['seq02']
|
# folders_collective = ['seq02']
|
||||||
self.path_collective = ['data/activity/' + fold for fold in self.folders_collective]
|
self.dir_data = 'data/activity/dataset'
|
||||||
self.THRESHOLD_PROB = 0.25 # Concordance for samples
|
self.THRESHOLD_PROB = 0.25 # Concordance for samples
|
||||||
self.THRESHOLD_DIST = 2 # Threshold to check distance of people
|
self.THRESHOLD_DIST = 2 # Threshold to check distance of people
|
||||||
self.RADII = (0.3, 0.5) # expected radii of the o-space
|
self.RADII = (0.3, 0.5) # expected radii of the o-space
|
||||||
self.PIFPAF_CONF = 0.4
|
self.PIFPAF_CONF = 0.3
|
||||||
self.SOCIAL_DISTANCE = False
|
self.SOCIAL_DISTANCE = False
|
||||||
# -------------------------------------------------------------------------------------------------------------
|
# -------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
# KITTI DATASET (social distancing)
|
# KITTI DATASET (social distancing)
|
||||||
# ------------------------------------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------------------------------------
|
||||||
else:
|
else:
|
||||||
self.dir_ann_kitti = '/data/lorenzo-data/annotations/kitti/scale_2_july'
|
self.dir_data = 'data/kitti/gt_activity'
|
||||||
self.dir_gt_kitti = 'data/kitti/gt_activity'
|
|
||||||
self.dir_kk = os.path.join('data', 'kitti', 'calib')
|
self.dir_kk = os.path.join('data', 'kitti', 'calib')
|
||||||
self.THRESHOLD_PROB = 0.25 # Concordance for samples
|
self.THRESHOLD_PROB = 0.25 # Concordance for samples
|
||||||
self.THRESHOLD_DIST = 2 # Threshold to check distance of people
|
self.THRESHOLD_DIST = 2 # Threshold to check distance of people
|
||||||
@ -62,25 +66,25 @@ class ActivityEvaluator:
|
|||||||
def eval_collective(self):
|
def eval_collective(self):
|
||||||
"""Parse Collective Activity Dataset and predict if people are talking or not"""
|
"""Parse Collective Activity Dataset and predict if people are talking or not"""
|
||||||
|
|
||||||
for fold in self.path_collective:
|
for seq in self.sequences:
|
||||||
images = glob.glob(fold + '/*.jpg')
|
images = glob.glob(os.path.join(self.dir_data, 'images', seq + '*.jpg'))
|
||||||
initial_path = os.path.join(fold, 'frame0001.jpg')
|
initial_im = os.path.join(self.dir_data, 'images', seq + '_frame0001.jpg')
|
||||||
with open(initial_path, 'rb') as f:
|
with open(initial_im, 'rb') as f:
|
||||||
image = Image.open(f).convert('RGB')
|
image = Image.open(f).convert('RGB')
|
||||||
im_size = image.size
|
im_size = image.size
|
||||||
|
assert len(im_size) > 1, "image with frame0001 not available"
|
||||||
|
|
||||||
for idx, im_path in enumerate(images):
|
for idx, im_path in enumerate(images):
|
||||||
|
|
||||||
# Collect PifPaf files and calibration
|
# Collect PifPaf files and calibration
|
||||||
basename = os.path.basename(im_path)
|
basename = os.path.basename(im_path)
|
||||||
extension = '.pifpaf.json'
|
extension = '.predictions.json'
|
||||||
path_pif = os.path.join(fold, basename + extension)
|
path_pif = os.path.join(self.dir_ann, basename + extension)
|
||||||
annotations = open_annotations(path_pif)
|
annotations = open_annotations(path_pif)
|
||||||
kk, _ = factory_for_gt(im_size, verbose=False)
|
kk, _ = factory_for_gt(im_size, verbose=False)
|
||||||
|
|
||||||
# Collect corresponding gt files (ys_gt: 1 or 0)
|
# Collect corresponding gt files (ys_gt: 1 or 0)
|
||||||
boxes_gt, ys_gt = parse_gt_collective(fold, path_pif)
|
boxes_gt, ys_gt = parse_gt_collective(self.dir_data, seq, path_pif)
|
||||||
|
|
||||||
# Run Monoloco
|
# Run Monoloco
|
||||||
dic_out, boxes = self.run_monoloco(annotations, kk, im_size=im_size)
|
dic_out, boxes = self.run_monoloco(annotations, kk, im_size=im_size)
|
||||||
|
|
||||||
@ -88,17 +92,19 @@ class ActivityEvaluator:
|
|||||||
matches = get_iou_matches(boxes, boxes_gt, iou_min=0.3)
|
matches = get_iou_matches(boxes, boxes_gt, iou_min=0.3)
|
||||||
|
|
||||||
# Estimate activity
|
# Estimate activity
|
||||||
categories = [os.path.basename(fold)] * len(boxes_gt)
|
categories = [seq] * len(boxes_gt) # for compatibility with KITTI evaluation
|
||||||
self.estimate_activity(dic_out, matches, ys_gt, categories=categories)
|
self.estimate_activity(dic_out, matches, ys_gt, categories=categories)
|
||||||
|
|
||||||
# Print Results
|
# Print Results
|
||||||
cout_results(self.cnt, self.all_gt, self.all_pred, categories=self.folders_collective)
|
acc = accuracy_score(self.all_gt[seq], self.all_pred[seq])
|
||||||
|
print(f"Accuracy of category {seq}: {100*acc:.2f}%")
|
||||||
|
cout_results(self.cnt, self.all_gt, self.all_pred, categories=self.sequences)
|
||||||
|
|
||||||
def eval_kitti(self):
|
def eval_kitti(self):
|
||||||
"""Parse KITTI Dataset and predict if people are talking or not"""
|
"""Parse KITTI Dataset and predict if people are talking or not"""
|
||||||
|
|
||||||
from ..utils import factory_file
|
from ..utils import factory_file
|
||||||
files = glob.glob(self.dir_gt_kitti + '/*.txt')
|
files = glob.glob(self.dir_data + '/*.txt')
|
||||||
# files = [self.dir_gt_kitti + '/001782.txt']
|
# files = [self.dir_gt_kitti + '/001782.txt']
|
||||||
assert files, "Empty directory"
|
assert files, "Empty directory"
|
||||||
|
|
||||||
@ -107,10 +113,10 @@ class ActivityEvaluator:
|
|||||||
# Collect PifPaf files and calibration
|
# Collect PifPaf files and calibration
|
||||||
basename, _ = os.path.splitext(os.path.basename(file))
|
basename, _ = os.path.splitext(os.path.basename(file))
|
||||||
path_calib = os.path.join(self.dir_kk, basename + '.txt')
|
path_calib = os.path.join(self.dir_kk, basename + '.txt')
|
||||||
annotations, kk, tt = factory_file(path_calib, self.dir_ann_kitti, basename)
|
annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename)
|
||||||
|
|
||||||
# Collect corresponding gt files (ys_gt: 1 or 0)
|
# Collect corresponding gt files (ys_gt: 1 or 0)
|
||||||
path_gt = os.path.join(self.dir_gt_kitti, basename + '.txt')
|
path_gt = os.path.join(self.dir_data, basename + '.txt')
|
||||||
boxes_gt, ys_gt, difficulties = parse_gt_kitti(path_gt)
|
boxes_gt, ys_gt, difficulties = parse_gt_kitti(path_gt)
|
||||||
|
|
||||||
# Run Monoloco
|
# Run Monoloco
|
||||||
@ -131,17 +137,16 @@ class ActivityEvaluator:
|
|||||||
angles = dic_out['angles']
|
angles = dic_out['angles']
|
||||||
dds = dic_out['dds_pred']
|
dds = dic_out['dds_pred']
|
||||||
stds = dic_out['stds_ale']
|
stds = dic_out['stds_ale']
|
||||||
confs = dic_out['confs']
|
|
||||||
xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']]
|
xz_centers = [[xx[0], xx[2]] for xx in dic_out['xyz_pred']]
|
||||||
|
|
||||||
# Count gt statistics
|
# Count gt statistics. (One element each gt)
|
||||||
for key in categories:
|
for key in categories:
|
||||||
self.cnt['gt'][key] += 1
|
self.cnt['gt'][key] += 1
|
||||||
self.cnt['gt']['all'] += 1
|
self.cnt['gt']['all'] += 1
|
||||||
|
|
||||||
for i_m, (idx, idx_gt) in enumerate(matches):
|
for i_m, (idx, idx_gt) in enumerate(matches):
|
||||||
|
|
||||||
# Select keys to update resultd for Collective or KITTI
|
# Select keys to update results for Collective or KITTI
|
||||||
keys = ('all', categories[idx_gt])
|
keys = ('all', categories[idx_gt])
|
||||||
|
|
||||||
# Run social interactions rule
|
# Run social interactions rule
|
||||||
@ -166,10 +171,12 @@ class ActivityEvaluator:
|
|||||||
return dic_out, boxes
|
return dic_out, boxes
|
||||||
|
|
||||||
|
|
||||||
def parse_gt_collective(fold, path_pif):
|
def parse_gt_collective(dir_data, seq, path_pif):
|
||||||
"""Parse both gt and binary label (1/0) for talking or not"""
|
"""Parse both gt and binary label (1/0) for talking or not"""
|
||||||
|
|
||||||
with open(os.path.join(fold, "annotations.txt"), "r") as ff:
|
path = os.path.join(dir_data, 'annotations', seq + '_annotations.txt')
|
||||||
|
|
||||||
|
with open(path, "r") as ff:
|
||||||
reader = csv.reader(ff, delimiter='\t')
|
reader = csv.reader(ff, delimiter='\t')
|
||||||
dic_frames = defaultdict(lambda: defaultdict(list))
|
dic_frames = defaultdict(lambda: defaultdict(list))
|
||||||
for idx, line in enumerate(reader):
|
for idx, line in enumerate(reader):
|
||||||
@ -212,17 +219,21 @@ def cout_results(cnt, all_gt, all_pred, categories=()):
|
|||||||
# Split by folders for collective activity
|
# Split by folders for collective activity
|
||||||
for key in categories:
|
for key in categories:
|
||||||
acc = accuracy_score(all_gt[key], all_pred[key])
|
acc = accuracy_score(all_gt[key], all_pred[key])
|
||||||
print("Accuracy of category {}: {:.2f}% , Recall: {:.2f}%, #: {}, Predicted positive: {:.2f}%"
|
print("Accuracy of category {}: {:.2f}% , Recall: {:.2f}%, #: {}, Pred/Real positive: {:.1f}% / {:.1f}%"
|
||||||
.format(key,
|
.format(key,
|
||||||
acc * 100,
|
acc * 100,
|
||||||
cnt['pred'][key] / cnt['gt'][key]*100,
|
cnt['pred'][key] / cnt['gt'][key]*100,
|
||||||
cnt['pred'][key],
|
cnt['pred'][key],
|
||||||
sum(all_gt[key]) / len(all_gt[key]) * 100))
|
sum(all_pred[key]) / len(all_pred[key]) * 100,
|
||||||
|
sum(all_gt[key]) / len(all_gt[key]) * 100
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Final Accuracy
|
# Final Accuracy
|
||||||
acc = accuracy_score(all_gt['all'], all_pred['all'])
|
acc = accuracy_score(all_gt['all'], all_pred['all'])
|
||||||
|
recall = cnt['pred']['all'] / cnt['gt']['all'] * 100 # only predictions that match a ground-truth are included
|
||||||
print('-' * 80)
|
print('-' * 80)
|
||||||
print("Final Accuracy: {:.2f}%".format(acc * 100))
|
print(f"Final Accuracy: {acc * 100:.2f} Final Recall:{recall:.2f}")
|
||||||
print('-' * 80)
|
print('-' * 80)
|
||||||
|
|
||||||
|
|
||||||
@ -244,8 +255,8 @@ def convert_category(cat):
|
|||||||
def extract_frame_number(path):
|
def extract_frame_number(path):
|
||||||
"""extract frame number from path"""
|
"""extract frame number from path"""
|
||||||
name = os.path.basename(path)
|
name = os.path.basename(path)
|
||||||
if name[5] == '0':
|
if name[11] == '0':
|
||||||
frame = name[6:9]
|
frame = name[12:15]
|
||||||
else:
|
else:
|
||||||
frame = name[5:9]
|
frame = name[11:15]
|
||||||
return frame
|
return frame
|
||||||
|
|||||||
@ -25,41 +25,58 @@ class EvalKitti:
|
|||||||
'27', '29', '31', '49')
|
'27', '29', '31', '49')
|
||||||
ALP_THRESHOLDS = ('<0.5m', '<1m', '<2m')
|
ALP_THRESHOLDS = ('<0.5m', '<1m', '<2m')
|
||||||
OUR_METHODS = ['geometric', 'monoloco', 'monoloco_pp', 'pose', 'reid', 'monstereo']
|
OUR_METHODS = ['geometric', 'monoloco', 'monoloco_pp', 'pose', 'reid', 'monstereo']
|
||||||
METHODS_MONO = ['m3d', 'monopsr']
|
METHODS_MONO = ['m3d', 'monopsr', 'smoke', 'monodis']
|
||||||
METHODS_STEREO = ['3dop', 'psf', 'pseudo-lidar', 'e2e', 'oc-stereo']
|
METHODS_STEREO = ['3dop', 'psf', 'pseudo-lidar', 'e2e', 'oc-stereo']
|
||||||
BASELINES = ['task_error', 'pixel_error']
|
BASELINES = ['task_error', 'pixel_error']
|
||||||
HEADERS = ('method', '<0.5', '<1m', '<2m', 'easy', 'moderate', 'hard', 'all')
|
HEADERS = ('method', '<0.5', '<1m', '<2m', 'easy', 'moderate', 'hard', 'all')
|
||||||
CATEGORIES = ('pedestrian',)
|
CATEGORIES = ('pedestrian',)
|
||||||
|
methods = OUR_METHODS + METHODS_MONO + METHODS_STEREO
|
||||||
|
|
||||||
def __init__(self, thresh_iou_monoloco=0.3, thresh_iou_base=0.3, thresh_conf_monoloco=0.2, thresh_conf_base=0.5,
|
# Set directories
|
||||||
verbose=False):
|
main_dir = os.path.join('data', 'kitti')
|
||||||
|
dir_gt = os.path.join(main_dir, 'gt')
|
||||||
|
path_train = os.path.join('splits', 'kitti_train.txt')
|
||||||
|
path_val = os.path.join('splits', 'kitti_val.txt')
|
||||||
|
dir_logs = os.path.join('data', 'logs')
|
||||||
|
assert os.path.exists(dir_logs), "No directory to save final statistics"
|
||||||
|
dir_fig = os.path.join('data', 'figures')
|
||||||
|
assert os.path.exists(dir_logs), "No directory to save figures"
|
||||||
|
|
||||||
self.main_dir = os.path.join('data', 'kitti')
|
# Set thresholds to obtain comparable recalls
|
||||||
self.dir_gt = os.path.join(self.main_dir, 'gt')
|
thresh_iou_monoloco = 0.3
|
||||||
self.methods = self.OUR_METHODS + self.METHODS_MONO + self.METHODS_STEREO
|
thresh_iou_base = 0.3
|
||||||
path_train = os.path.join('splits', 'kitti_train.txt')
|
thresh_conf_monoloco = 0.2
|
||||||
path_val = os.path.join('splits', 'kitti_val.txt')
|
thresh_conf_base = 0.5
|
||||||
dir_logs = os.path.join('data', 'logs')
|
|
||||||
assert dir_logs, "No directory to save final statistics"
|
def __init__(self, args):
|
||||||
|
|
||||||
|
self.verbose = args.verbose
|
||||||
|
self.net = args.net
|
||||||
|
self.save = args.save
|
||||||
|
self.show = args.show
|
||||||
|
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
now_time = now.strftime("%Y%m%d-%H%M")[2:]
|
now_time = now.strftime("%Y%m%d-%H%M")[2:]
|
||||||
self.path_results = os.path.join(dir_logs, 'eval-' + now_time + '.json')
|
self.path_results = os.path.join(self.dir_logs, 'eval-' + now_time + '.json')
|
||||||
self.verbose = verbose
|
|
||||||
|
|
||||||
self.dic_thresh_iou = {method: (thresh_iou_monoloco if method in self.OUR_METHODS
|
# Set thresholds for comparable recalls
|
||||||
else thresh_iou_base)
|
self.dic_thresh_iou = {method: (self.thresh_iou_monoloco if method in self.OUR_METHODS
|
||||||
|
else self.thresh_iou_base)
|
||||||
for method in self.methods}
|
for method in self.methods}
|
||||||
self.dic_thresh_conf = {method: (thresh_conf_monoloco if method in self.OUR_METHODS
|
self.dic_thresh_conf = {method: (self.thresh_conf_monoloco if method in self.OUR_METHODS
|
||||||
else thresh_conf_base)
|
else self.thresh_conf_base)
|
||||||
for method in self.methods}
|
for method in self.methods}
|
||||||
self.dic_thresh_conf['monopsr'] += 0.3
|
|
||||||
self.dic_thresh_conf['e2e-pl'] = -100 # They don't have enough detections
|
# Set thresholds to obtain comparable recall
|
||||||
|
self.dic_thresh_conf['monopsr'] += 0.4
|
||||||
|
self.dic_thresh_conf['e2e-pl'] = -100
|
||||||
self.dic_thresh_conf['oc-stereo'] = -100
|
self.dic_thresh_conf['oc-stereo'] = -100
|
||||||
|
self.dic_thresh_conf['smoke'] = -100
|
||||||
|
self.dic_thresh_conf['monodis'] = -100
|
||||||
|
|
||||||
# Extract validation images for evaluation
|
# Extract validation images for evaluation
|
||||||
names_gt = tuple(os.listdir(self.dir_gt))
|
names_gt = tuple(os.listdir(self.dir_gt))
|
||||||
_, self.set_val = split_training(names_gt, path_train, path_val)
|
_, self.set_val = split_training(names_gt, self.path_train, self.path_val)
|
||||||
|
|
||||||
# self.set_val = ('002282.txt', )
|
# self.set_val = ('002282.txt', )
|
||||||
|
|
||||||
@ -68,10 +85,13 @@ class EvalKitti:
|
|||||||
= None
|
= None
|
||||||
self.cnt = 0
|
self.cnt = 0
|
||||||
|
|
||||||
|
# Filter methods with empty or non existent directory
|
||||||
|
filter_directories(self.main_dir, self.methods)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""Evaluate Monoloco performances on ALP and ALE metrics"""
|
"""Evaluate Monoloco performances on ALP and ALE metrics"""
|
||||||
for self.category in self.CATEGORIES:
|
|
||||||
|
|
||||||
|
for self.category in self.CATEGORIES:
|
||||||
# Initialize variables
|
# Initialize variables
|
||||||
self.errors = defaultdict(lambda: defaultdict(list))
|
self.errors = defaultdict(lambda: defaultdict(list))
|
||||||
self.dic_stds = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
self.dic_stds = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
||||||
@ -90,7 +110,7 @@ class EvalKitti:
|
|||||||
methods_out = defaultdict(tuple) # Save all methods for comparison
|
methods_out = defaultdict(tuple) # Save all methods for comparison
|
||||||
|
|
||||||
# Count ground_truth:
|
# Count ground_truth:
|
||||||
boxes_gt, ys, truncs_gt, occs_gt = out_gt
|
boxes_gt, ys, truncs_gt, occs_gt = out_gt # pylint: disable=unbalanced-tuple-unpacking
|
||||||
for idx, box in enumerate(boxes_gt):
|
for idx, box in enumerate(boxes_gt):
|
||||||
mode = get_difficulty(box, truncs_gt[idx], occs_gt[idx])
|
mode = get_difficulty(box, truncs_gt[idx], occs_gt[idx])
|
||||||
self.cnt_gt[mode] += 1
|
self.cnt_gt[mode] += 1
|
||||||
@ -100,7 +120,6 @@ class EvalKitti:
|
|||||||
for method in self.methods:
|
for method in self.methods:
|
||||||
# Extract annotations
|
# Extract annotations
|
||||||
dir_method = os.path.join(self.main_dir, method)
|
dir_method = os.path.join(self.main_dir, method)
|
||||||
assert os.path.exists(dir_method), "directory of the method %s does not exists" % method
|
|
||||||
path_method = os.path.join(dir_method, name)
|
path_method = os.path.join(dir_method, name)
|
||||||
methods_out[method] = self._parse_txts(path_method, method=method)
|
methods_out[method] = self._parse_txts(path_method, method=method)
|
||||||
|
|
||||||
@ -124,12 +143,14 @@ class EvalKitti:
|
|||||||
print('\n' + self.category.upper() + ':')
|
print('\n' + self.category.upper() + ':')
|
||||||
self.show_statistics()
|
self.show_statistics()
|
||||||
|
|
||||||
def printer(self, show, save):
|
def printer(self):
|
||||||
if save or show:
|
if self.save or self.show:
|
||||||
show_results(self.dic_stats, self.CLUSTERS, show=show, save=save)
|
show_results(self.dic_stats, self.CLUSTERS, self.net, self.dir_fig, show=self.show, save=self.save)
|
||||||
show_spread(self.dic_stats, self.CLUSTERS, show=show, save=save)
|
show_spread(self.dic_stats, self.CLUSTERS, self.net, self.dir_fig, show=self.show, save=self.save)
|
||||||
show_box_plot(self.errors, self.CLUSTERS, show=show, save=save)
|
if self.net == 'monstero':
|
||||||
show_task_error(show=show, save=save)
|
show_box_plot(self.errors, self.CLUSTERS, self.dir_fig, show=self.show, save=self.save)
|
||||||
|
else:
|
||||||
|
show_task_error(self.dir_fig, show=self.show, save=self.save)
|
||||||
|
|
||||||
def _parse_txts(self, path, method):
|
def _parse_txts(self, path, method):
|
||||||
|
|
||||||
@ -352,7 +373,7 @@ class EvalKitti:
|
|||||||
self.name = name
|
self.name = name
|
||||||
# Iterate over each line of the gt file and save box location and distances
|
# Iterate over each line of the gt file and save box location and distances
|
||||||
out_gt = parse_ground_truth(path_gt, 'pedestrian')
|
out_gt = parse_ground_truth(path_gt, 'pedestrian')
|
||||||
boxes_gt, ys, truncs_gt, occs_gt = out_gt
|
boxes_gt, ys, truncs_gt, occs_gt = out_gt # pylint: disable=unbalanced-tuple-unpacking
|
||||||
for label in ys:
|
for label in ys:
|
||||||
heights.append(label[4])
|
heights.append(label[4])
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -430,3 +451,14 @@ def extract_indices(idx_to_check, *args):
|
|||||||
def average(my_list):
|
def average(my_list):
|
||||||
"""calculate mean of a list"""
|
"""calculate mean of a list"""
|
||||||
return sum(my_list) / len(my_list)
|
return sum(my_list) / len(my_list)
|
||||||
|
|
||||||
|
|
||||||
|
def filter_directories(main_dir, methods):
|
||||||
|
for method in methods:
|
||||||
|
dir_method = os.path.join(main_dir, method)
|
||||||
|
if not os.path.exists(dir_method):
|
||||||
|
methods.remove(method)
|
||||||
|
print(f"\nMethod {method}. No directory found. Skipping it..")
|
||||||
|
elif not os.listdir(dir_method):
|
||||||
|
methods.remove(method)
|
||||||
|
print(f"\nMethod {method}. Directory is empty. Skipping it..")
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
|
# pylint: disable=too-many-statements,too-many-branches,cyclic-import
|
||||||
# pylint: disable=too-many-statements,cyclic-import, too-many-branches
|
|
||||||
|
|
||||||
"""Joints Analysis: Supplementary material of MonStereo"""
|
"""Joints Analysis: Supplementary material of MonStereo"""
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
#pylint: disable=too-many-branches
|
# pylint: disable=too-many-branches
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Run MonoLoco/MonStereo and converts annotations into KITTI format
|
Run MonoLoco/MonStereo and converts annotations into KITTI format
|
||||||
@ -22,39 +22,35 @@ from .reid_baseline import get_reid_features, ReID
|
|||||||
|
|
||||||
class GenerateKitti:
|
class GenerateKitti:
|
||||||
|
|
||||||
METHODS = ['monstereo', 'monoloco_pp', 'monoloco', 'geometric']
|
dir_gt = os.path.join('data', 'kitti', 'gt')
|
||||||
|
dir_gt_new = os.path.join('data', 'kitti', 'gt_new')
|
||||||
|
dir_kk = os.path.join('data', 'kitti', 'calib')
|
||||||
|
dir_byc = '/data/lorenzo-data/kitti/object_detection/left'
|
||||||
|
monoloco_checkpoint = 'data/models/monoloco-190717-0952.pkl'
|
||||||
|
baselines = {'mono': [], 'stereo': []}
|
||||||
|
|
||||||
def __init__(self, model, dir_ann, p_dropout=0.2, n_dropout=0, hidden_size=1024):
|
def __init__(self, args):
|
||||||
|
|
||||||
|
# Load Network
|
||||||
|
self.net = args.net
|
||||||
|
assert args.net in ('monstereo', 'monoloco_pp'), "net not recognized"
|
||||||
|
|
||||||
# Load monoloco
|
|
||||||
use_cuda = torch.cuda.is_available()
|
use_cuda = torch.cuda.is_available()
|
||||||
device = torch.device("cuda" if use_cuda else "cpu")
|
device = torch.device("cuda" if use_cuda else "cpu")
|
||||||
|
self.model = Loco(
|
||||||
if 'monstereo' in self.METHODS:
|
model=args.model,
|
||||||
self.monstereo = Loco(model=model, net='monstereo', device=device, n_dropout=n_dropout, p_dropout=p_dropout,
|
net=args.net,
|
||||||
linear_size=hidden_size)
|
device=device,
|
||||||
# model_mono_pp = 'data/models/monoloco-191122-1122.pkl' # KITTI_p
|
n_dropout=args.n_dropout,
|
||||||
# model_mono_pp = 'data/models/monoloco-191018-1459.pkl' # nuScenes_p
|
p_dropout=args.dropout,
|
||||||
model_mono_pp = 'data/models/stereoloco-200604-0949.pkl' # KITTI_pp
|
linear_size=args.hidden_size
|
||||||
# model_mono_pp = 'data/models/stereoloco-200608-1550.pkl' # nuScenes_pp
|
)
|
||||||
|
|
||||||
if 'monoloco_pp' in self.METHODS:
|
|
||||||
self.monoloco_pp = Loco(model=model_mono_pp, net='monoloco_pp', device=device, n_dropout=n_dropout,
|
|
||||||
p_dropout=p_dropout)
|
|
||||||
|
|
||||||
if 'monoloco' in self.METHODS:
|
|
||||||
model_mono = 'data/models/monoloco-190717-0952.pkl' # KITTI
|
|
||||||
# model_mono = 'data/models/monoloco-190719-0923.pkl' # NuScenes
|
|
||||||
self.monoloco = Loco(model=model_mono, net='monoloco', device=device, n_dropout=n_dropout,
|
|
||||||
p_dropout=p_dropout, linear_size=256)
|
|
||||||
self.dir_ann = dir_ann
|
|
||||||
|
|
||||||
# Extract list of pifpaf files in validation images
|
# Extract list of pifpaf files in validation images
|
||||||
self.dir_gt = os.path.join('data', 'kitti', 'gt')
|
self.dir_ann = args.dir_ann
|
||||||
self.dir_gt_new = os.path.join('data', 'kitti', 'gt_new')
|
self.generate_official = args.generate_official
|
||||||
self.set_basename = factory_basename(dir_ann, self.dir_gt)
|
assert os.listdir(self.dir_ann), "Annotation directory is empty"
|
||||||
self.dir_kk = os.path.join('data', 'kitti', 'calib')
|
self.set_basename = factory_basename(args.dir_ann, self.dir_gt)
|
||||||
self.dir_byc = '/data/lorenzo-data/kitti/object_detection/left'
|
|
||||||
|
|
||||||
# For quick testing
|
# For quick testing
|
||||||
# ------------------------------------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------------------------------------
|
||||||
@ -62,33 +58,48 @@ class GenerateKitti:
|
|||||||
# self.set_basename = ('002282',)
|
# self.set_basename = ('002282',)
|
||||||
# ------------------------------------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
# Calculate stereo baselines
|
# Add monocular and stereo baselines (they require monoloco as backbone)
|
||||||
# self.baselines = ['pose', 'reid']
|
if args.baselines:
|
||||||
self.baselines = []
|
|
||||||
self.cnt_disparity = defaultdict(int)
|
# Load MonoLoco
|
||||||
self.cnt_no_stereo = 0
|
self.baselines['mono'] = ['monoloco', 'geometric']
|
||||||
self.dir_images = os.path.join('data', 'kitti', 'images')
|
self.monoloco = Loco(
|
||||||
self.dir_images_r = os.path.join('data', 'kitti', 'images_r')
|
model=self.monoloco_checkpoint,
|
||||||
# ReID Baseline
|
net='monoloco',
|
||||||
if 'reid' in self.baselines:
|
device=device,
|
||||||
weights_path = 'data/models/reid_model_market.pkl'
|
n_dropout=args.n_dropout,
|
||||||
self.reid_net = ReID(weights_path=weights_path, device=device, num_classes=751, height=256, width=128)
|
p_dropout=args.dropout,
|
||||||
|
linear_size=256
|
||||||
|
)
|
||||||
|
# Stereo baselines
|
||||||
|
if args.net == 'monstereo':
|
||||||
|
self.baselines['stereo'] = ['pose', 'reid']
|
||||||
|
self.cnt_disparity = defaultdict(int)
|
||||||
|
self.cnt_no_stereo = 0
|
||||||
|
self.dir_images = os.path.join('data', 'kitti', 'images')
|
||||||
|
self.dir_images_r = os.path.join('data', 'kitti', 'images_r')
|
||||||
|
|
||||||
|
# ReID Baseline
|
||||||
|
weights_path = 'data/models/reid_model_market.pkl'
|
||||||
|
self.reid_net = ReID(weights_path=weights_path, device=device, num_classes=751, height=256, width=128)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""Run Monoloco and save txt files for KITTI evaluation"""
|
"""Run Monoloco and save txt files for KITTI evaluation"""
|
||||||
|
|
||||||
cnt_ann = cnt_file = cnt_no_file = 0
|
cnt_ann = cnt_file = cnt_no_file = 0
|
||||||
dir_out = {key: os.path.join('data', 'kitti', key) for key in self.METHODS}
|
|
||||||
print("\n")
|
|
||||||
for key in self.METHODS:
|
|
||||||
make_new_directory(dir_out[key])
|
|
||||||
|
|
||||||
for key in self.baselines:
|
# Prepare empty folder
|
||||||
dir_out[key] = os.path.join('data', 'kitti', key)
|
di = os.path.join('data', 'kitti', self.net)
|
||||||
make_new_directory(dir_out[key])
|
make_new_directory(di)
|
||||||
print("Created empty output directory for {}".format(key))
|
dir_out = {self.net: di}
|
||||||
|
|
||||||
# Run monoloco over the list of images
|
for mode, names in self.baselines.items():
|
||||||
|
for name in names:
|
||||||
|
di = os.path.join('data', 'kitti', name)
|
||||||
|
make_new_directory(di)
|
||||||
|
dir_out[name] = di
|
||||||
|
|
||||||
|
# Run the model
|
||||||
for basename in self.set_basename:
|
for basename in self.set_basename:
|
||||||
path_calib = os.path.join(self.dir_kk, basename + '.txt')
|
path_calib = os.path.join(self.dir_kk, basename + '.txt')
|
||||||
annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename)
|
annotations, kk, tt = factory_file(path_calib, self.dir_ann, basename)
|
||||||
@ -98,58 +109,60 @@ class GenerateKitti:
|
|||||||
annotations_r, _, _ = factory_file(path_calib, self.dir_ann, basename, mode='right')
|
annotations_r, _, _ = factory_file(path_calib, self.dir_ann, basename, mode='right')
|
||||||
_, keypoints_r = preprocess_pifpaf(annotations_r, im_size=(1242, 374))
|
_, keypoints_r = preprocess_pifpaf(annotations_r, im_size=(1242, 374))
|
||||||
|
|
||||||
|
if self.net == 'monstereo':
|
||||||
|
dic_out = self.model.forward(keypoints, kk, keypoints_r=keypoints_r)
|
||||||
|
elif self.net == 'monoloco_pp':
|
||||||
|
dic_out = self.model.forward(keypoints, kk)
|
||||||
|
|
||||||
|
all_outputs = {self.net: [dic_out['xyzd'], dic_out['bi'], dic_out['epi'],
|
||||||
|
dic_out['yaw'], dic_out['h'], dic_out['w'], dic_out['l']]}
|
||||||
|
zzs = [float(el[2]) for el in dic_out['xyzd']]
|
||||||
|
|
||||||
|
# Save txt files
|
||||||
|
params = [kk, tt]
|
||||||
|
path_txt = os.path.join(dir_out[self.net], basename + '.txt')
|
||||||
|
save_txts(path_txt, boxes, all_outputs[self.net], params, mode=self.net, cat=cat)
|
||||||
cnt_ann += len(boxes)
|
cnt_ann += len(boxes)
|
||||||
cnt_file += 1
|
cnt_file += 1
|
||||||
all_inputs, all_outputs = {}, {}
|
|
||||||
|
|
||||||
# STEREOLOCO
|
# MONO (+ STEREO BASELINES)
|
||||||
dic_out = self.monstereo.forward(keypoints, kk, keypoints_r=keypoints_r)
|
if self.baselines['mono']:
|
||||||
all_outputs['monstereo'] = [dic_out['xyzd'], dic_out['bi'], dic_out['epi'],
|
# MONOLOCO
|
||||||
dic_out['yaw'], dic_out['h'], dic_out['w'], dic_out['l']]
|
|
||||||
|
|
||||||
# MONOLOCO++
|
|
||||||
if 'monoloco_pp' in self.METHODS:
|
|
||||||
dic_out = self.monoloco_pp.forward(keypoints, kk)
|
|
||||||
all_outputs['monoloco_pp'] = [dic_out['xyzd'], dic_out['bi'], dic_out['epi'],
|
|
||||||
dic_out['yaw'], dic_out['h'], dic_out['w'], dic_out['l']]
|
|
||||||
zzs = [float(el[2]) for el in dic_out['xyzd']]
|
|
||||||
|
|
||||||
# MONOLOCO
|
|
||||||
if 'monoloco' in self.METHODS:
|
|
||||||
dic_out = self.monoloco.forward(keypoints, kk)
|
dic_out = self.monoloco.forward(keypoints, kk)
|
||||||
zzs_geom, xy_centers = geometric_coordinates(keypoints, kk, average_y=0.48)
|
zzs_geom, xy_centers = geometric_coordinates(keypoints, kk, average_y=0.48)
|
||||||
all_outputs['monoloco'] = [dic_out['d'], dic_out['bi'], dic_out['epi']] + [zzs_geom, xy_centers]
|
all_outputs['monoloco'] = [dic_out['d'], dic_out['bi'], dic_out['epi']] + [zzs_geom, xy_centers]
|
||||||
all_outputs['geometric'] = all_outputs['monoloco']
|
all_outputs['geometric'] = all_outputs['monoloco']
|
||||||
|
|
||||||
params = [kk, tt]
|
# monocular baselines
|
||||||
|
for key in self.baselines['mono']:
|
||||||
|
path_txt = {key: os.path.join(dir_out[key], basename + '.txt')}
|
||||||
|
save_txts(path_txt[key], boxes, all_outputs[key], params, mode=key, cat=cat)
|
||||||
|
|
||||||
for key in self.METHODS:
|
# stereo baselines
|
||||||
path_txt = {key: os.path.join(dir_out[key], basename + '.txt')}
|
if self.baselines['stereo']:
|
||||||
save_txts(path_txt[key], boxes, all_outputs[key], params, mode=key, cat=cat)
|
all_inputs = {}
|
||||||
|
dic_xyz = self._run_stereo_baselines(basename, boxes, keypoints, zzs, path_calib)
|
||||||
|
for key in dic_xyz:
|
||||||
|
all_outputs[key] = all_outputs['monoloco'].copy()
|
||||||
|
all_outputs[key][0] = dic_xyz[key]
|
||||||
|
all_inputs[key] = boxes
|
||||||
|
|
||||||
# STEREO BASELINES
|
path_txt[key] = os.path.join(dir_out[key], basename + '.txt')
|
||||||
if self.baselines:
|
save_txts(path_txt[key], all_inputs[key], all_outputs[key], params,
|
||||||
dic_xyz = self._run_stereo_baselines(basename, boxes, keypoints, zzs, path_calib)
|
mode='baseline',
|
||||||
|
cat=cat)
|
||||||
for key in dic_xyz:
|
|
||||||
all_outputs[key] = all_outputs['monoloco'].copy()
|
|
||||||
all_outputs[key][0] = dic_xyz[key]
|
|
||||||
all_inputs[key] = boxes
|
|
||||||
|
|
||||||
path_txt[key] = os.path.join(dir_out[key], basename + '.txt')
|
|
||||||
save_txts(path_txt[key], all_inputs[key], all_outputs[key], params, mode='baseline', cat=cat)
|
|
||||||
|
|
||||||
print("\nSaved in {} txt {} annotations. Not found {} images".format(cnt_file, cnt_ann, cnt_no_file))
|
print("\nSaved in {} txt {} annotations. Not found {} images".format(cnt_file, cnt_ann, cnt_no_file))
|
||||||
|
|
||||||
if 'monstereo' in self.METHODS:
|
if self.net == 'monstereo':
|
||||||
print("STEREO:")
|
print("STEREO:")
|
||||||
for key in self.baselines:
|
for key in self.baselines['stereo']:
|
||||||
print("Annotations corrected using {} baseline: {:.1f}%".format(
|
print("Annotations corrected using {} baseline: {:.1f}%".format(
|
||||||
key, self.cnt_disparity[key] / cnt_ann * 100))
|
key, self.cnt_disparity[key] / cnt_ann * 100))
|
||||||
print("Maximum possible stereo associations: {:.1f}%".format(self.cnt_disparity['max'] / cnt_ann * 100))
|
|
||||||
print("Not found {}/{} stereo files".format(self.cnt_no_stereo, cnt_file))
|
print("Not found {}/{} stereo files".format(self.cnt_no_stereo, cnt_file))
|
||||||
|
|
||||||
create_empty_files(dir_out) # Create empty files for official evaluation
|
if self.generate_official:
|
||||||
|
create_empty_files(dir_out, self.net) # Create empty files for official evaluation
|
||||||
|
|
||||||
def _run_stereo_baselines(self, basename, boxes, keypoints, zzs, path_calib):
|
def _run_stereo_baselines(self, basename, boxes, keypoints, zzs, path_calib):
|
||||||
|
|
||||||
@ -165,14 +178,14 @@ class GenerateKitti:
|
|||||||
path_image = os.path.join(self.dir_images, basename + '.png')
|
path_image = os.path.join(self.dir_images, basename + '.png')
|
||||||
path_image_r = os.path.join(self.dir_images_r, basename + '.png')
|
path_image_r = os.path.join(self.dir_images_r, basename + '.png')
|
||||||
reid_features = get_reid_features(self.reid_net, boxes, boxes_r, path_image, path_image_r)
|
reid_features = get_reid_features(self.reid_net, boxes, boxes_r, path_image, path_image_r)
|
||||||
dic_zzs, cnt = baselines_association(self.baselines, zzs, keypoints, keypoints_r, reid_features)
|
dic_zzs, cnt = baselines_association(self.baselines['stereo'], zzs, keypoints, keypoints_r, reid_features)
|
||||||
|
|
||||||
for key in cnt:
|
for key in cnt:
|
||||||
self.cnt_disparity[key] += cnt[key]
|
self.cnt_disparity[key] += cnt[key]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.cnt_no_stereo += 1
|
self.cnt_no_stereo += 1
|
||||||
dic_zzs = {key: zzs for key in self.baselines}
|
dic_zzs = {key: zzs for key in self.baselines['stereo']}
|
||||||
|
|
||||||
# Combine the stereo zz with x, y from 2D detection (no MonoLoco involved)
|
# Combine the stereo zz with x, y from 2D detection (no MonoLoco involved)
|
||||||
dic_xyz = defaultdict(list)
|
dic_xyz = defaultdict(list)
|
||||||
@ -227,8 +240,9 @@ def save_txts(path_txt, all_inputs, all_outputs, all_params, mode='monoloco', ca
|
|||||||
conf_scale = 0.03
|
conf_scale = 0.03
|
||||||
elif mode == 'monoloco_pp':
|
elif mode == 'monoloco_pp':
|
||||||
conf_scale = 0.033
|
conf_scale = 0.033
|
||||||
|
# conf_scale = 0.035 # nuScenes for having same recall
|
||||||
else:
|
else:
|
||||||
conf_scale = 0.055
|
conf_scale = 0.05
|
||||||
conf = conf_scale * (uv_box[-1]) / (bi / math.sqrt(xx ** 2 + yy ** 2 + zz ** 2))
|
conf = conf_scale * (uv_box[-1]) / (bi / math.sqrt(xx ** 2 + yy ** 2 + zz ** 2))
|
||||||
|
|
||||||
output_list = [alpha] + uv_box[:-1] + hwl + cam_0 + [ry, conf, bi, epi]
|
output_list = [alpha] + uv_box[:-1] + hwl + cam_0 + [ry, conf, bi, epi]
|
||||||
@ -244,11 +258,10 @@ def save_txts(path_txt, all_inputs, all_outputs, all_params, mode='monoloco', ca
|
|||||||
ff.write("\n")
|
ff.write("\n")
|
||||||
|
|
||||||
|
|
||||||
def create_empty_files(dir_out):
|
def create_empty_files(dir_out, net):
|
||||||
"""Create empty txt files to run official kitti metrics on MonStereo and all other methods"""
|
"""Create empty txt files to run official kitti metrics on MonStereo and all other methods"""
|
||||||
|
|
||||||
methods = ['pseudo-lidar', 'monopsr', '3dop', 'm3d', 'oc-stereo', 'e2e']
|
methods = ['pseudo-lidar', 'monopsr', '3dop', 'm3d', 'oc-stereo', 'e2e', 'monodis', 'smoke']
|
||||||
methods = []
|
|
||||||
dirs = [os.path.join('data', 'kitti', method) for method in methods]
|
dirs = [os.path.join('data', 'kitti', method) for method in methods]
|
||||||
dirs_orig = [os.path.join('data', 'kitti', method + '-orig') for method in methods]
|
dirs_orig = [os.path.join('data', 'kitti', method + '-orig') for method in methods]
|
||||||
|
|
||||||
@ -263,8 +276,7 @@ def create_empty_files(dir_out):
|
|||||||
# If the file exits, rewrite in new folder, otherwise create empty file
|
# If the file exits, rewrite in new folder, otherwise create empty file
|
||||||
read_and_rewrite(path_orig, path)
|
read_and_rewrite(path_orig, path)
|
||||||
|
|
||||||
for method in ('monoloco_pp', 'monstereo'):
|
for i in range(7481):
|
||||||
for i in range(7481):
|
name = "0" * (6 - len(str(i))) + str(i) + '.txt'
|
||||||
name = "0" * (6 - len(str(i))) + str(i) + '.txt'
|
ff = open(os.path.join(dir_out[net], name), "a+")
|
||||||
ff = open(os.path.join(dir_out[method], name), "a+")
|
ff.close()
|
||||||
ff.close()
|
|
||||||
|
|||||||
@ -29,7 +29,7 @@ def get_reid_features(reid_net, boxes, boxes_r, path_image, path_image_r):
|
|||||||
|
|
||||||
class ReID(object):
|
class ReID(object):
|
||||||
def __init__(self, weights_path, device, num_classes=751, height=256, width=128):
|
def __init__(self, weights_path, device, num_classes=751, height=256, width=128):
|
||||||
super(ReID, self).__init__()
|
super().__init__()
|
||||||
torch.manual_seed(1)
|
torch.manual_seed(1)
|
||||||
self.device = device
|
self.device = device
|
||||||
|
|
||||||
@ -90,7 +90,7 @@ class ReID(object):
|
|||||||
|
|
||||||
class ResNet50(nn.Module):
|
class ResNet50(nn.Module):
|
||||||
def __init__(self, num_classes, loss):
|
def __init__(self, num_classes, loss):
|
||||||
super(ResNet50, self).__init__()
|
super().__init__()
|
||||||
self.loss = loss
|
self.loss = loss
|
||||||
resnet50 = torchvision.models.resnet50(pretrained=True)
|
resnet50 = torchvision.models.resnet50(pretrained=True)
|
||||||
self.base = nn.Sequential(*list(resnet50.children())[:-2])
|
self.base = nn.Sequential(*list(resnet50.children())[:-2])
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
from .net import Loco
|
from .net import Loco
|
||||||
from .pifpaf import PifPaf, ImageList
|
|
||||||
from .process import unnormalize_bi, extract_outputs, extract_labels, extract_labels_aux
|
from .process import unnormalize_bi, extract_outputs, extract_labels, extract_labels_aux
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import torch.nn as nn
|
|||||||
class MonStereoModel(nn.Module):
|
class MonStereoModel(nn.Module):
|
||||||
|
|
||||||
def __init__(self, input_size, output_size=2, linear_size=512, p_dropout=0.2, num_stage=3, device='cuda'):
|
def __init__(self, input_size, output_size=2, linear_size=512, p_dropout=0.2, num_stage=3, device='cuda'):
|
||||||
super(MonStereoModel, self).__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.num_stage = num_stage
|
self.num_stage = num_stage
|
||||||
self.stereo_size = input_size
|
self.stereo_size = input_size
|
||||||
@ -73,7 +73,7 @@ class MonStereoModel(nn.Module):
|
|||||||
|
|
||||||
class MyLinearSimple(nn.Module):
|
class MyLinearSimple(nn.Module):
|
||||||
def __init__(self, linear_size, p_dropout=0.5):
|
def __init__(self, linear_size, p_dropout=0.5):
|
||||||
super(MyLinearSimple, self).__init__()
|
super().__init__()
|
||||||
self.l_size = linear_size
|
self.l_size = linear_size
|
||||||
|
|
||||||
self.relu = nn.ReLU(inplace=True)
|
self.relu = nn.ReLU(inplace=True)
|
||||||
@ -109,7 +109,7 @@ class MonolocoModel(nn.Module):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, input_size, output_size=2, linear_size=256, p_dropout=0.2, num_stage=3):
|
def __init__(self, input_size, output_size=2, linear_size=256, p_dropout=0.2, num_stage=3):
|
||||||
super(MonolocoModel, self).__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.input_size = input_size
|
self.input_size = input_size
|
||||||
self.output_size = output_size
|
self.output_size = output_size
|
||||||
@ -147,7 +147,7 @@ class MonolocoModel(nn.Module):
|
|||||||
|
|
||||||
class MyLinear(nn.Module):
|
class MyLinear(nn.Module):
|
||||||
def __init__(self, linear_size, p_dropout=0.5):
|
def __init__(self, linear_size, p_dropout=0.5):
|
||||||
super(MyLinear, self).__init__()
|
super().__init__()
|
||||||
self.l_size = linear_size
|
self.l_size = linear_size
|
||||||
|
|
||||||
self.relu = nn.ReLU(inplace=True)
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
|||||||
@ -56,7 +56,7 @@ class Loco:
|
|||||||
output_size=output_size)
|
output_size=output_size)
|
||||||
else:
|
else:
|
||||||
self.model = MonStereoModel(p_dropout=p_dropout, input_size=input_size, output_size=output_size,
|
self.model = MonStereoModel(p_dropout=p_dropout, input_size=input_size, output_size=output_size,
|
||||||
linear_size=linear_size, device=self.device)
|
linear_size=linear_size, device=self.device)
|
||||||
|
|
||||||
self.model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage))
|
self.model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage))
|
||||||
else:
|
else:
|
||||||
@ -163,7 +163,7 @@ class Loco:
|
|||||||
print("found {} matches with ground-truth".format(len(matches)))
|
print("found {} matches with ground-truth".format(len(matches)))
|
||||||
|
|
||||||
# Keep track of instances non-matched
|
# Keep track of instances non-matched
|
||||||
idxs_matches = (el[0] for el in matches)
|
idxs_matches = [el[0] for el in matches]
|
||||||
not_matches = [idx for idx, _ in enumerate(boxes) if idx not in idxs_matches]
|
not_matches = [idx for idx, _ in enumerate(boxes) if idx not in idxs_matches]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -1,102 +0,0 @@
|
|||||||
|
|
||||||
import glob
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torchvision
|
|
||||||
import torch
|
|
||||||
from PIL import Image, ImageFile
|
|
||||||
from openpifpaf.network import nets
|
|
||||||
from openpifpaf import decoder
|
|
||||||
|
|
||||||
from .process import image_transform
|
|
||||||
|
|
||||||
|
|
||||||
class ImageList(torch.utils.data.Dataset):
|
|
||||||
"""It defines transformations to apply to images and outputs of the dataloader"""
|
|
||||||
def __init__(self, image_paths, scale):
|
|
||||||
self.image_paths = image_paths
|
|
||||||
self.image_paths.sort()
|
|
||||||
self.scale = scale
|
|
||||||
|
|
||||||
def __getitem__(self, index):
|
|
||||||
image_path = self.image_paths[index]
|
|
||||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
|
||||||
with open(image_path, 'rb') as f:
|
|
||||||
image = Image.open(f).convert('RGB')
|
|
||||||
|
|
||||||
if self.scale > 1.01 or self.scale < 0.99:
|
|
||||||
image = torchvision.transforms.functional.resize(image,
|
|
||||||
(round(self.scale * image.size[1]),
|
|
||||||
round(self.scale * image.size[0])),
|
|
||||||
interpolation=Image.BICUBIC)
|
|
||||||
# PIL images are not iterables
|
|
||||||
original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1
|
|
||||||
image = image_transform(image)
|
|
||||||
|
|
||||||
return image_path, original_image, image
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.image_paths)
|
|
||||||
|
|
||||||
|
|
||||||
def factory_from_args(args):
|
|
||||||
|
|
||||||
# Merge the model_pifpaf argument
|
|
||||||
if not args.checkpoint:
|
|
||||||
args.checkpoint = 'resnet152' # Default model Resnet 152
|
|
||||||
# glob
|
|
||||||
if args.glob:
|
|
||||||
args.images += glob.glob(args.glob)
|
|
||||||
if not args.images:
|
|
||||||
raise Exception("no image files given")
|
|
||||||
|
|
||||||
# add args.device
|
|
||||||
args.device = torch.device('cpu')
|
|
||||||
args.pin_memory = False
|
|
||||||
if torch.cuda.is_available():
|
|
||||||
args.device = torch.device('cuda')
|
|
||||||
args.pin_memory = True
|
|
||||||
|
|
||||||
# Add num_workers
|
|
||||||
args.loader_workers = 8
|
|
||||||
|
|
||||||
# Add visualization defaults
|
|
||||||
args.figure_width = 10
|
|
||||||
args.dpi_factor = 1.0
|
|
||||||
|
|
||||||
return args
|
|
||||||
|
|
||||||
|
|
||||||
class PifPaf:
|
|
||||||
def __init__(self, args):
|
|
||||||
"""Instanciate the mdodel"""
|
|
||||||
factory_from_args(args)
|
|
||||||
model_pifpaf, _ = nets.factory_from_args(args)
|
|
||||||
model_pifpaf = model_pifpaf.to(args.device)
|
|
||||||
self.processor = decoder.factory_from_args(args, model_pifpaf)
|
|
||||||
self.keypoints_whole = []
|
|
||||||
|
|
||||||
# Scale the keypoints to the original image size for printing (if not webcam)
|
|
||||||
self.scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
|
|
||||||
|
|
||||||
def fields(self, processed_images):
|
|
||||||
"""Encoder for pif and paf fields"""
|
|
||||||
fields_batch = self.processor.fields(processed_images)
|
|
||||||
return fields_batch
|
|
||||||
|
|
||||||
def forward(self, image, processed_image_cpu, fields):
|
|
||||||
"""Decoder, from pif and paf fields to keypoints"""
|
|
||||||
self.processor.set_cpu_image(image, processed_image_cpu)
|
|
||||||
keypoint_sets, scores = self.processor.keypoint_sets(fields)
|
|
||||||
|
|
||||||
if keypoint_sets.size > 0:
|
|
||||||
self.keypoints_whole.append(np.around((keypoint_sets / self.scale_np), 1)
|
|
||||||
.reshape(keypoint_sets.shape[0], -1).tolist())
|
|
||||||
|
|
||||||
pifpaf_out = [
|
|
||||||
{'keypoints': np.around(kps / self.scale_np, 1).reshape(-1).tolist(),
|
|
||||||
'bbox': [np.min(kps[:, 0]) / self.scale_np[0, 0], np.min(kps[:, 1]) / self.scale_np[0, 0],
|
|
||||||
np.max(kps[:, 0]) / self.scale_np[0, 0], np.max(kps[:, 1]) / self.scale_np[0, 0]]}
|
|
||||||
for kps in keypoint_sets
|
|
||||||
]
|
|
||||||
return keypoint_sets, scores, pifpaf_out
|
|
||||||
@ -82,7 +82,7 @@ def factory_for_gt(im_size, name=None, path_gt=None, verbose=True):
|
|||||||
dic_gt = None
|
dic_gt = None
|
||||||
x_factor = im_size[0] / 1600
|
x_factor = im_size[0] / 1600
|
||||||
y_factor = im_size[1] / 900
|
y_factor = im_size[1] / 900
|
||||||
pixel_factor = (x_factor + y_factor) / 2 # 1.7 for MOT
|
pixel_factor = (x_factor + y_factor) / 1.75 # 1.75 for MOT
|
||||||
# pixel_factor = 1
|
# pixel_factor = 1
|
||||||
if im_size[0] / im_size[1] > 2.5:
|
if im_size[0] / im_size[1] > 2.5:
|
||||||
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
|
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
|
||||||
@ -274,7 +274,6 @@ def extract_outputs(outputs, tasks=()):
|
|||||||
|
|
||||||
if outputs.shape[1] == 10:
|
if outputs.shape[1] == 10:
|
||||||
dic_out['aux'] = torch.sigmoid(dic_out['aux'])
|
dic_out['aux'] = torch.sigmoid(dic_out['aux'])
|
||||||
|
|
||||||
return dic_out
|
return dic_out
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2,144 +2,171 @@
|
|||||||
# pylint: disable=too-many-statements, too-many-branches, undefined-loop-variable
|
# pylint: disable=too-many-statements, too-many-branches, undefined-loop-variable
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import glob
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from PIL import Image
|
import PIL
|
||||||
|
import openpifpaf
|
||||||
|
import openpifpaf.datasets as datasets
|
||||||
|
from openpifpaf.predict import processor_factory, preprocess_factory
|
||||||
|
from openpifpaf import decoder, network, visualizer, show
|
||||||
|
|
||||||
from .visuals.printer import Printer
|
from .visuals.printer import Printer
|
||||||
from .visuals.pifpaf_show import KeypointPainter, image_canvas
|
from .network import Loco
|
||||||
from .network import PifPaf, ImageList, Loco
|
|
||||||
from .network.process import factory_for_gt, preprocess_pifpaf
|
from .network.process import factory_for_gt, preprocess_pifpaf
|
||||||
|
from .activity import show_social
|
||||||
|
|
||||||
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def factory_from_args(args):
|
||||||
|
|
||||||
|
# Data
|
||||||
|
if args.glob:
|
||||||
|
args.images += glob.glob(args.glob)
|
||||||
|
if not args.images:
|
||||||
|
raise Exception("no image files given")
|
||||||
|
|
||||||
|
# Model
|
||||||
|
if not args.checkpoint:
|
||||||
|
args.checkpoint = 'data/models/shufflenetv2k30-201104-224654-cocokp-d75ed641.pkl' # Default model
|
||||||
|
|
||||||
|
# Devices
|
||||||
|
args.device = torch.device('cpu')
|
||||||
|
args.disable_cuda = False
|
||||||
|
args.pin_memory = False
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
args.device = torch.device('cuda')
|
||||||
|
args.pin_memory = True
|
||||||
|
args.loader_workers = 8
|
||||||
|
|
||||||
|
# Add visualization defaults
|
||||||
|
args.figure_width = 10
|
||||||
|
args.dpi_factor = 1.0
|
||||||
|
|
||||||
|
if args.net == 'monstereo':
|
||||||
|
args.batch_size = 2
|
||||||
|
else:
|
||||||
|
args.batch_size = 1
|
||||||
|
|
||||||
|
# Make default pifpaf argument
|
||||||
|
args.force_complete_pose = True
|
||||||
|
print("Force complete pose is active")
|
||||||
|
|
||||||
|
# Configure
|
||||||
|
decoder.configure(args)
|
||||||
|
network.configure(args)
|
||||||
|
show.configure(args)
|
||||||
|
visualizer.configure(args)
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
def predict(args):
|
def predict(args):
|
||||||
|
|
||||||
cnt = 0
|
cnt = 0
|
||||||
|
args = factory_from_args(args)
|
||||||
|
|
||||||
# Load Models
|
# Load Models
|
||||||
pifpaf = PifPaf(args)
|
assert args.net in ('monoloco_pp', 'monstereo', 'pifpaf')
|
||||||
assert args.mode in ('mono', 'stereo', 'pifpaf')
|
|
||||||
|
|
||||||
if 'mono' in args.mode:
|
if args.net in ('monoloco_pp', 'monstereo'):
|
||||||
monoloco = Loco(model=args.model, net='monoloco_pp',
|
net = Loco(model=args.model, net=args.net, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||||
device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
|
||||||
|
|
||||||
if 'stereo' in args.mode:
|
|
||||||
monstereo = Loco(model=args.model, net='monstereo',
|
|
||||||
device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
|
|
||||||
|
|
||||||
# data
|
# data
|
||||||
data = ImageList(args.images, scale=args.scale)
|
processor, model = processor_factory(args)
|
||||||
if args.mode == 'stereo':
|
preprocess = preprocess_factory(args)
|
||||||
|
|
||||||
|
# data
|
||||||
|
data = datasets.ImageList(args.images, preprocess=preprocess)
|
||||||
|
if args.net == 'monstereo':
|
||||||
assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting"
|
assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting"
|
||||||
bs = 2
|
|
||||||
else:
|
|
||||||
bs = 1
|
|
||||||
data_loader = torch.utils.data.DataLoader(
|
data_loader = torch.utils.data.DataLoader(
|
||||||
data, batch_size=bs, shuffle=False,
|
data, batch_size=args.batch_size, shuffle=False,
|
||||||
pin_memory=args.pin_memory, num_workers=args.loader_workers)
|
pin_memory=False, collate_fn=datasets.collate_images_anns_meta)
|
||||||
|
|
||||||
for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
|
# visualizers
|
||||||
images = image_tensors.permute(0, 2, 3, 1)
|
annotation_painter = openpifpaf.show.AnnotationPainter()
|
||||||
|
|
||||||
processed_images = processed_images_cpu.to(args.device, non_blocking=True)
|
for batch_i, (image_tensors_batch, _, meta_batch) in enumerate(data_loader):
|
||||||
fields_batch = pifpaf.fields(processed_images)
|
pred_batch = processor.batch(model, image_tensors_batch, device=args.device)
|
||||||
|
|
||||||
# unbatch stereo pair
|
# unbatch (only for MonStereo)
|
||||||
for ii, (image_path, image, processed_image_cpu, fields) in enumerate(zip(
|
for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
|
||||||
image_paths, images, processed_images_cpu, fields_batch)):
|
LOG.info('batch %d: %s', batch_i, meta['file_name'])
|
||||||
|
pred = preprocess.annotations_inverse(pred, meta)
|
||||||
|
|
||||||
if args.output_directory is None:
|
if args.output_directory is None:
|
||||||
splits = os.path.split(image_paths[0])
|
splits = os.path.split(meta['file_name'])
|
||||||
output_path = os.path.join(splits[0], 'out_' + splits[1])
|
output_path = os.path.join(splits[0], 'out_' + splits[1])
|
||||||
else:
|
else:
|
||||||
file_name = os.path.basename(image_paths[0])
|
file_name = os.path.basename(meta['file_name'])
|
||||||
output_path = os.path.join(args.output_directory, 'out_' + file_name)
|
output_path = os.path.join(args.output_directory, 'out_' + file_name)
|
||||||
print('image', idx, image_path, output_path)
|
print('image', batch_i, meta['file_name'], output_path)
|
||||||
keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
|
pifpaf_out = [ann.json_data() for ann in pred]
|
||||||
|
|
||||||
if ii == 0:
|
if idx == 0:
|
||||||
pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing
|
pifpaf_outputs = pred # to only print left image for stereo
|
||||||
images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor and monoloco original image
|
|
||||||
pifpaf_outs = {'left': pifpaf_out}
|
pifpaf_outs = {'left': pifpaf_out}
|
||||||
image_path_l = image_path
|
with open(meta_batch[0]['file_name'], 'rb') as f:
|
||||||
|
cpu_image = PIL.Image.open(f).convert('RGB')
|
||||||
else:
|
else:
|
||||||
pifpaf_outs['right'] = pifpaf_out
|
pifpaf_outs['right'] = pifpaf_out
|
||||||
|
|
||||||
if args.mode in ('stereo', 'mono'):
|
# 3D Predictions
|
||||||
# Extract calibration matrix and ground truth file if present
|
if args.net in ('monoloco_pp', 'monstereo'):
|
||||||
with open(image_path_l, 'rb') as f:
|
|
||||||
pil_image = Image.open(f).convert('RGB')
|
|
||||||
images_outputs.append(pil_image)
|
|
||||||
|
|
||||||
im_name = os.path.basename(image_path_l)
|
im_name = os.path.basename(meta['file_name'])
|
||||||
im_size = (float(image.size()[1] / args.scale), float(image.size()[0] / args.scale)) # Original
|
im_size = (cpu_image.size[0], cpu_image.size[1]) # Original
|
||||||
kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
|
kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
|
||||||
|
|
||||||
# Preprocess pifpaf outputs and run monoloco
|
# Preprocess pifpaf outputs and run monoloco
|
||||||
boxes, keypoints = preprocess_pifpaf(pifpaf_outs['left'], im_size, enlarge_boxes=False)
|
boxes, keypoints = preprocess_pifpaf(pifpaf_outs['left'], im_size, enlarge_boxes=False)
|
||||||
|
|
||||||
if args.mode == 'mono':
|
if args.net == 'monoloco_pp':
|
||||||
print("Prediction with MonoLoco++")
|
print("Prediction with MonoLoco++")
|
||||||
dic_out = monoloco.forward(keypoints, kk)
|
dic_out = net.forward(keypoints, kk)
|
||||||
dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt)
|
dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt, reorder=not args.social_distance)
|
||||||
|
|
||||||
|
if args.social_distance:
|
||||||
|
show_social(args, cpu_image, output_path, pifpaf_out, dic_out)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print("Prediction with MonStereo")
|
print("Prediction with MonStereo")
|
||||||
boxes_r, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size)
|
boxes_r, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size)
|
||||||
dic_out = monstereo.forward(keypoints, kk, keypoints_r=keypoints_r)
|
dic_out = net.forward(keypoints, kk, keypoints_r=keypoints_r)
|
||||||
dic_out = monstereo.post_process(dic_out, boxes, keypoints, kk, dic_gt)
|
dic_out = net.post_process(dic_out, boxes, keypoints, kk, dic_gt)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
dic_out = defaultdict(list)
|
dic_out = defaultdict(list)
|
||||||
kk = None
|
kk = None
|
||||||
|
|
||||||
factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
|
if not args.social_distance:
|
||||||
|
factory_outputs(args, annotation_painter, cpu_image, output_path, pifpaf_outputs,
|
||||||
|
dic_out=dic_out, kk=kk)
|
||||||
print('Image {}\n'.format(cnt) + '-' * 120)
|
print('Image {}\n'.format(cnt) + '-' * 120)
|
||||||
cnt += 1
|
cnt += 1
|
||||||
|
|
||||||
|
|
||||||
def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None):
|
def factory_outputs(args, annotation_painter, cpu_image, output_path, pred, dic_out=None, kk=None):
|
||||||
"""Output json files or images according to the choice"""
|
"""Output json files or images according to the choice"""
|
||||||
|
|
||||||
# Save json file
|
# Save json file
|
||||||
if args.mode == 'pifpaf':
|
if args.net == 'pifpaf':
|
||||||
keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:]
|
with openpifpaf.show.image_canvas(cpu_image, output_path) as ax:
|
||||||
|
annotation_painter.annotations(ax, pred)
|
||||||
|
|
||||||
# Visualizer
|
|
||||||
keypoint_painter = KeypointPainter(show_box=False)
|
|
||||||
skeleton_painter = KeypointPainter(show_box=False, color_connections=True, markersize=1, linewidth=4)
|
|
||||||
|
|
||||||
if 'json' in args.output_types and keypoint_sets.size > 0:
|
|
||||||
with open(output_path + '.pifpaf.json', 'w') as f:
|
|
||||||
json.dump(pifpaf_out, f)
|
|
||||||
|
|
||||||
if 'keypoints' in args.output_types:
|
|
||||||
with image_canvas(images_outputs[0],
|
|
||||||
output_path + '.keypoints.png',
|
|
||||||
show=args.show,
|
|
||||||
fig_width=args.figure_width,
|
|
||||||
dpi_factor=args.dpi_factor) as ax:
|
|
||||||
keypoint_painter.keypoints(ax, keypoint_sets)
|
|
||||||
|
|
||||||
if 'skeleton' in args.output_types:
|
|
||||||
with image_canvas(images_outputs[0],
|
|
||||||
output_path + '.skeleton.png',
|
|
||||||
show=args.show,
|
|
||||||
fig_width=args.figure_width,
|
|
||||||
dpi_factor=args.dpi_factor) as ax:
|
|
||||||
skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
|
|
||||||
|
|
||||||
else:
|
|
||||||
if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])):
|
if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])):
|
||||||
print(output_path)
|
print(output_path)
|
||||||
if dic_out['boxes']: # Only print in case of detections
|
if dic_out['boxes']: # Only print in case of detections
|
||||||
printer = Printer(images_outputs[1], output_path, kk, args)
|
printer = Printer(cpu_image, output_path, kk, args)
|
||||||
figures, axes = printer.factory_axes()
|
figures, axes = printer.factory_axes(dic_out)
|
||||||
printer.draw(figures, axes, dic_out, images_outputs[1])
|
printer.draw(figures, axes, cpu_image)
|
||||||
|
|
||||||
if 'json' in args.output_types:
|
if 'json' in args.output_types:
|
||||||
with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
|
with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
|
||||||
|
|||||||
@ -24,10 +24,9 @@ from .transforms import flip_inputs, flip_labels, height_augmentation
|
|||||||
class PreprocessKitti:
|
class PreprocessKitti:
|
||||||
"""Prepare arrays with same format as nuScenes preprocessing but using ground truth txt files"""
|
"""Prepare arrays with same format as nuScenes preprocessing but using ground truth txt files"""
|
||||||
|
|
||||||
# AV_W = 0.68
|
dir_gt = os.path.join('data', 'kitti', 'gt')
|
||||||
# AV_L = 0.75
|
dir_images = '/data/lorenzo-data/kitti/original_images/training/image_2'
|
||||||
# AV_H = 1.72
|
dir_byc_l = '/data/lorenzo-data/kitti/object_detection/left'
|
||||||
# WLH_STD = 0.1
|
|
||||||
|
|
||||||
# SOCIAL DISTANCING PARAMETERS
|
# SOCIAL DISTANCING PARAMETERS
|
||||||
THRESHOLD_DIST = 2 # Threshold to check distance of people
|
THRESHOLD_DIST = 2 # Threshold to check distance of people
|
||||||
@ -51,9 +50,6 @@ class PreprocessKitti:
|
|||||||
self.dir_ann = dir_ann
|
self.dir_ann = dir_ann
|
||||||
self.iou_min = iou_min
|
self.iou_min = iou_min
|
||||||
self.monocular = monocular
|
self.monocular = monocular
|
||||||
self.dir_gt = os.path.join('data', 'kitti', 'gt')
|
|
||||||
self.dir_images = '/data/lorenzo-data/kitti/original_images/training/image_2'
|
|
||||||
self.dir_byc_l = '/data/lorenzo-data/kitti/object_detection/left'
|
|
||||||
self.names_gt = tuple(os.listdir(self.dir_gt))
|
self.names_gt = tuple(os.listdir(self.dir_gt))
|
||||||
self.dir_kk = os.path.join('data', 'kitti', 'calib')
|
self.dir_kk = os.path.join('data', 'kitti', 'calib')
|
||||||
self.list_gt = glob.glob(self.dir_gt + '/*.txt')
|
self.list_gt = glob.glob(self.dir_gt + '/*.txt')
|
||||||
@ -97,7 +93,9 @@ class PreprocessKitti:
|
|||||||
category = 'pedestrian'
|
category = 'pedestrian'
|
||||||
|
|
||||||
# Extract ground truth
|
# Extract ground truth
|
||||||
boxes_gt, ys, _, _ = parse_ground_truth(path_gt, category=category, spherical=True)
|
boxes_gt, ys, _, _ = parse_ground_truth(path_gt, # pylint: disable=unbalanced-tuple-unpacking
|
||||||
|
category=category,
|
||||||
|
spherical=True)
|
||||||
cnt_gt[phase] += len(boxes_gt)
|
cnt_gt[phase] += len(boxes_gt)
|
||||||
cnt_files += 1
|
cnt_files += 1
|
||||||
cnt_files_ped += min(len(boxes_gt), 1) # if no boxes 0 else 1
|
cnt_files_ped += min(len(boxes_gt), 1) # if no boxes 0 else 1
|
||||||
@ -170,7 +168,7 @@ class PreprocessKitti:
|
|||||||
self.dic_jo[phase]['X'].append(inp)
|
self.dic_jo[phase]['X'].append(inp)
|
||||||
self.dic_jo[phase]['Y'].append(lab)
|
self.dic_jo[phase]['Y'].append(lab)
|
||||||
self.dic_jo[phase]['names'].append(name) # One image name for each annotation
|
self.dic_jo[phase]['names'].append(name) # One image name for each annotation
|
||||||
append_cluster(self.dic_jo, phase, inp, lab, keypoint)
|
append_cluster(self.dic_jo, phase, inp, lab, keypoint.tolist())
|
||||||
cnt_mono[phase] += 1
|
cnt_mono[phase] += 1
|
||||||
cnt_tot += 1
|
cnt_tot += 1
|
||||||
|
|
||||||
|
|||||||
@ -87,7 +87,7 @@ class PreprocessNuscenes:
|
|||||||
while not current_token == "":
|
while not current_token == "":
|
||||||
sample_dic = self.nusc.get('sample', current_token)
|
sample_dic = self.nusc.get('sample', current_token)
|
||||||
cnt_samples += 1
|
cnt_samples += 1
|
||||||
|
# if (cnt_samples % 4 == 0) and (cnt_ann < 3000):
|
||||||
# Extract all the sample_data tokens for each sample
|
# Extract all the sample_data tokens for each sample
|
||||||
for cam in self.CAMERAS:
|
for cam in self.CAMERAS:
|
||||||
sd_token = sample_dic['data'][cam]
|
sd_token = sample_dic['data'][cam]
|
||||||
@ -105,7 +105,7 @@ class PreprocessNuscenes:
|
|||||||
self.dic_names[basename + '.jpg']['K'] = copy.deepcopy(kk)
|
self.dic_names[basename + '.jpg']['K'] = copy.deepcopy(kk)
|
||||||
|
|
||||||
# Run IoU with pifpaf detections and save
|
# Run IoU with pifpaf detections and save
|
||||||
path_pif = os.path.join(self.dir_ann, name + '.pifpaf.json')
|
path_pif = os.path.join(self.dir_ann, name + '.predictions.json')
|
||||||
exists = os.path.isfile(path_pif)
|
exists = os.path.isfile(path_pif)
|
||||||
|
|
||||||
if exists:
|
if exists:
|
||||||
@ -114,7 +114,6 @@ class PreprocessNuscenes:
|
|||||||
boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900))
|
boxes, keypoints = preprocess_pifpaf(annotations, im_size=(1600, 900))
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if keypoints:
|
if keypoints:
|
||||||
matches = get_iou_matches(boxes, boxes_gt, self.iou_min)
|
matches = get_iou_matches(boxes, boxes_gt, self.iou_min)
|
||||||
for (idx, idx_gt) in matches:
|
for (idx, idx_gt) in matches:
|
||||||
@ -130,7 +129,6 @@ class PreprocessNuscenes:
|
|||||||
append_cluster(self.dic_jo, phase, inp, lab, keypoint)
|
append_cluster(self.dic_jo, phase, inp, lab, keypoint)
|
||||||
cnt_ann += 1
|
cnt_ann += 1
|
||||||
sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t')
|
sys.stdout.write('\r' + 'Saved annotations {}'.format(cnt_ann) + '\t')
|
||||||
|
|
||||||
current_token = sample_dic['next']
|
current_token = sample_dic['next']
|
||||||
|
|
||||||
with open(os.path.join(self.path_joints), 'w') as f:
|
with open(os.path.join(self.path_joints), 'w') as f:
|
||||||
@ -139,7 +137,7 @@ class PreprocessNuscenes:
|
|||||||
json.dump(self.dic_names, f)
|
json.dump(self.dic_names, f)
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
|
||||||
extract_box_average(self.dic_jo['train']['boxes_3d'])
|
# extract_box_average(self.dic_jo['train']['boxes_3d'])
|
||||||
print("\nSaved {} annotations for {} samples in {} scenes. Total time: {:.1f} minutes"
|
print("\nSaved {} annotations for {} samples in {} scenes. Total time: {:.1f} minutes"
|
||||||
.format(cnt_ann, cnt_samples, cnt_scenes, (end-start)/60))
|
.format(cnt_ann, cnt_samples, cnt_scenes, (end-start)/60))
|
||||||
print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints))
|
print("\nOutput files:\n{}\n{}\n".format(self.path_names, self.path_joints))
|
||||||
|
|||||||
@ -2,8 +2,7 @@
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from openpifpaf.network import nets
|
from openpifpaf import decoder, network, visualizer, show
|
||||||
from openpifpaf import decoder
|
|
||||||
|
|
||||||
|
|
||||||
def cli():
|
def cli():
|
||||||
@ -37,15 +36,18 @@ def cli():
|
|||||||
help='what to output: json keypoints skeleton for Pifpaf'
|
help='what to output: json keypoints skeleton for Pifpaf'
|
||||||
'json bird front or multi for MonStereo')
|
'json bird front or multi for MonStereo')
|
||||||
predict_parser.add_argument('--no_save', help='to show images', action='store_true')
|
predict_parser.add_argument('--no_save', help='to show images', action='store_true')
|
||||||
predict_parser.add_argument('--show', help='to show images', action='store_true')
|
predict_parser.add_argument('--dpi', help='image resolution', type=int, default=150)
|
||||||
predict_parser.add_argument('--dpi', help='image resolution', type=int, default=100)
|
predict_parser.add_argument('--long-edge', default=None, type=int,
|
||||||
|
help='rescale the long side of the image (aspect ratio maintained)')
|
||||||
|
|
||||||
# Pifpaf
|
# Pifpaf parsers
|
||||||
nets.cli(predict_parser)
|
decoder.cli(predict_parser)
|
||||||
decoder.cli(predict_parser, force_complete_pose=True, instance_threshold=0.15)
|
network.cli(predict_parser)
|
||||||
predict_parser.add_argument('--scale', default=1.0, type=float, help='change the scale of the image to preprocess')
|
show.cli(predict_parser)
|
||||||
|
visualizer.cli(predict_parser)
|
||||||
|
|
||||||
# Monoloco
|
# Monoloco
|
||||||
|
predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo')
|
||||||
predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True)
|
predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True)
|
||||||
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512)
|
predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512)
|
||||||
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
|
predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',
|
||||||
@ -57,18 +59,15 @@ def cli():
|
|||||||
predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
|
predict_parser.add_argument('--show_all', help='only predict ground-truth matches or all', action='store_true')
|
||||||
|
|
||||||
# Social distancing and social interactions
|
# Social distancing and social interactions
|
||||||
predict_parser.add_argument('--social', help='social', action='store_true')
|
predict_parser.add_argument('--social_distance', help='social', action='store_true')
|
||||||
predict_parser.add_argument('--activity', help='activity', action='store_true')
|
|
||||||
predict_parser.add_argument('--json_dir', help='for social')
|
|
||||||
predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
|
predict_parser.add_argument('--threshold_prob', type=float, help='concordance for samples', default=0.25)
|
||||||
predict_parser.add_argument('--threshold_dist', type=float, help='min distance of people', default=2)
|
predict_parser.add_argument('--threshold_dist', type=float, help='min distance of people', default=2.5)
|
||||||
predict_parser.add_argument('--margin', type=float, help='conservative for noise in orientation', default=1.5)
|
predict_parser.add_argument('--radii', type=tuple, help='o-space radii', default=(0.3, 0.5, 1))
|
||||||
predict_parser.add_argument('--radii', type=tuple, help='o-space radii', default=(0.25, 1, 2))
|
|
||||||
|
|
||||||
# Training
|
# Training
|
||||||
training_parser.add_argument('--joints', help='Json file with input joints',
|
training_parser.add_argument('--joints', help='Json file with input joints',
|
||||||
default='data/arrays/joints-nuscenes_teaser-190513-1846.json')
|
default='data/arrays/joints-nuscenes_teaser-190513-1846.json')
|
||||||
training_parser.add_argument('--save', help='whether to not save model and log file', action='store_true')
|
training_parser.add_argument('--no_save', help='to not save model and log file', action='store_true')
|
||||||
training_parser.add_argument('-e', '--epochs', type=int, help='number of epochs to train for', default=500)
|
training_parser.add_argument('-e', '--epochs', type=int, help='number of epochs to train for', default=500)
|
||||||
training_parser.add_argument('--bs', type=int, default=512, help='input batch size')
|
training_parser.add_argument('--bs', type=int, default=512, help='input batch size')
|
||||||
training_parser.add_argument('--monocular', help='whether to train monoloco', action='store_true')
|
training_parser.add_argument('--monocular', help='whether to train monoloco', action='store_true')
|
||||||
@ -81,7 +80,9 @@ def cli():
|
|||||||
training_parser.add_argument('--hyp', help='run hyperparameters tuning', action='store_true')
|
training_parser.add_argument('--hyp', help='run hyperparameters tuning', action='store_true')
|
||||||
training_parser.add_argument('--multiplier', type=int, help='Size of the grid of hyp search', default=1)
|
training_parser.add_argument('--multiplier', type=int, help='Size of the grid of hyp search', default=1)
|
||||||
training_parser.add_argument('--r_seed', type=int, help='specify the seed for training and hyp tuning', default=1)
|
training_parser.add_argument('--r_seed', type=int, help='specify the seed for training and hyp tuning', default=1)
|
||||||
training_parser.add_argument('--activity', help='new', action='store_true')
|
training_parser.add_argument('--print_loss', help='print training and validation losses', action='store_true')
|
||||||
|
training_parser.add_argument('--auto_tune_mtl', help='whether to use uncertainty to autotune losses',
|
||||||
|
action='store_true')
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
eval_parser.add_argument('--dataset', help='datasets to evaluate, kitti or nuscenes', default='kitti')
|
eval_parser.add_argument('--dataset', help='datasets to evaluate, kitti or nuscenes', default='kitti')
|
||||||
@ -102,6 +103,9 @@ def cli():
|
|||||||
eval_parser.add_argument('--variance', help='evaluate keypoints variance', action='store_true')
|
eval_parser.add_argument('--variance', help='evaluate keypoints variance', action='store_true')
|
||||||
eval_parser.add_argument('--activity', help='evaluate activities', action='store_true')
|
eval_parser.add_argument('--activity', help='evaluate activities', action='store_true')
|
||||||
eval_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo')
|
eval_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo')
|
||||||
|
eval_parser.add_argument('--baselines', help='whether to evaluate stereo baselines', action='store_true')
|
||||||
|
eval_parser.add_argument('--generate_official', help='whether to add empty txt files for official evaluation',
|
||||||
|
action='store_true')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
return args
|
return args
|
||||||
@ -110,10 +114,7 @@ def cli():
|
|||||||
def main():
|
def main():
|
||||||
args = cli()
|
args = cli()
|
||||||
if args.command == 'predict':
|
if args.command == 'predict':
|
||||||
if args.activity:
|
from .predict import predict
|
||||||
from .activity import predict
|
|
||||||
else:
|
|
||||||
from .predict import predict
|
|
||||||
predict(args)
|
predict(args)
|
||||||
|
|
||||||
elif args.command == 'prep':
|
elif args.command == 'prep':
|
||||||
@ -135,14 +136,11 @@ def main():
|
|||||||
hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs,
|
hyp_tuning = HypTuning(joints=args.joints, epochs=args.epochs,
|
||||||
monocular=args.monocular, dropout=args.dropout,
|
monocular=args.monocular, dropout=args.dropout,
|
||||||
multiplier=args.multiplier, r_seed=args.r_seed)
|
multiplier=args.multiplier, r_seed=args.r_seed)
|
||||||
hyp_tuning.train()
|
hyp_tuning.train(args)
|
||||||
else:
|
else:
|
||||||
|
|
||||||
from .train import Trainer
|
from .train import Trainer
|
||||||
training = Trainer(joints=args.joints, epochs=args.epochs, bs=args.bs,
|
training = Trainer(args)
|
||||||
monocular=args.monocular, dropout=args.dropout, lr=args.lr, sched_step=args.sched_step,
|
|
||||||
n_stage=args.n_stage, sched_gamma=args.sched_gamma, hidden_size=args.hidden_size,
|
|
||||||
r_seed=args.r_seed, save=args.save)
|
|
||||||
|
|
||||||
_ = training.train()
|
_ = training.train()
|
||||||
_ = training.evaluate()
|
_ = training.evaluate()
|
||||||
@ -169,19 +167,18 @@ def main():
|
|||||||
else:
|
else:
|
||||||
if args.generate:
|
if args.generate:
|
||||||
from .eval.generate_kitti import GenerateKitti
|
from .eval.generate_kitti import GenerateKitti
|
||||||
kitti_txt = GenerateKitti(args.model, args.dir_ann, p_dropout=args.dropout, n_dropout=args.n_dropout,
|
kitti_txt = GenerateKitti(args)
|
||||||
hidden_size=args.hidden_size)
|
|
||||||
kitti_txt.run()
|
kitti_txt.run()
|
||||||
|
|
||||||
if args.dataset == 'kitti':
|
if args.dataset == 'kitti':
|
||||||
from .eval import EvalKitti
|
from .eval import EvalKitti
|
||||||
kitti_eval = EvalKitti(verbose=args.verbose)
|
kitti_eval = EvalKitti(args)
|
||||||
kitti_eval.run()
|
kitti_eval.run()
|
||||||
kitti_eval.printer(show=args.show, save=args.save)
|
kitti_eval.printer()
|
||||||
|
|
||||||
elif 'nuscenes' in args.dataset:
|
elif 'nuscenes' in args.dataset:
|
||||||
from .train import Trainer
|
from .train import Trainer
|
||||||
training = Trainer(joints=args.joints, hidden_size=args.hidden_size)
|
training = Trainer(args)
|
||||||
_ = training.evaluate(load=True, model=args.model, debug=False)
|
_ = training.evaluate(load=True, model=args.model, debug=False)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -61,7 +61,7 @@ class HypTuning:
|
|||||||
# plt.hist(self.lr_list, bins=50)
|
# plt.hist(self.lr_list, bins=50)
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
|
||||||
def train(self):
|
def train(self, args):
|
||||||
"""Train multiple times using log-space random search"""
|
"""Train multiple times using log-space random search"""
|
||||||
|
|
||||||
best_acc_val = 20
|
best_acc_val = 20
|
||||||
@ -76,10 +76,7 @@ class HypTuning:
|
|||||||
hidden_size = self.hidden_list[idx]
|
hidden_size = self.hidden_list[idx]
|
||||||
n_stage = self.n_stage_list[idx]
|
n_stage = self.n_stage_list[idx]
|
||||||
|
|
||||||
training = Trainer(joints=self.joints, epochs=self.num_epochs,
|
training = Trainer(args)
|
||||||
bs=bs, monocular=self.monocular, dropout=self.dropout, lr=lr, sched_step=sched_step,
|
|
||||||
sched_gamma=sched_gamma, hidden_size=hidden_size, n_stage=n_stage,
|
|
||||||
save=False, print_loss=False, r_seed=self.r_seed)
|
|
||||||
|
|
||||||
best_epoch = training.train()
|
best_epoch = training.train()
|
||||||
dic_err, model = training.evaluate()
|
dic_err, model = training.evaluate()
|
||||||
|
|||||||
@ -27,7 +27,7 @@ class AutoTuneMultiTaskLoss(torch.nn.Module):
|
|||||||
loss_values = [lam * l(o, g) / (2.0 * (log_sigma.exp() ** 2))
|
loss_values = [lam * l(o, g) / (2.0 * (log_sigma.exp() ** 2))
|
||||||
for lam, log_sigma, l, o, g in zip(self.lambdas, self.log_sigmas, self.losses, out, gt_out)]
|
for lam, log_sigma, l, o, g in zip(self.lambdas, self.log_sigmas, self.losses, out, gt_out)]
|
||||||
|
|
||||||
auto_reg = [log_sigma for log_sigma in self.log_sigmas]
|
auto_reg = [log_sigma for log_sigma in self.log_sigmas] # pylint: disable=unnecessary-comprehension
|
||||||
|
|
||||||
loss = sum(loss_values) + sum(auto_reg)
|
loss = sum(loss_values) + sum(auto_reg)
|
||||||
if phase == 'val':
|
if phase == 'val':
|
||||||
@ -70,7 +70,7 @@ class MultiTaskLoss(torch.nn.Module):
|
|||||||
class CompositeLoss(torch.nn.Module):
|
class CompositeLoss(torch.nn.Module):
|
||||||
|
|
||||||
def __init__(self, tasks):
|
def __init__(self, tasks):
|
||||||
super(CompositeLoss, self).__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.tasks = tasks
|
self.tasks = tasks
|
||||||
self.multi_loss_tr = {task: (LaplacianLoss() if task == 'd'
|
self.multi_loss_tr = {task: (LaplacianLoss() if task == 'd'
|
||||||
@ -98,7 +98,7 @@ class CompositeLoss(torch.nn.Module):
|
|||||||
class LaplacianLoss(torch.nn.Module):
|
class LaplacianLoss(torch.nn.Module):
|
||||||
"""1D Gaussian with std depending on the absolute distance"""
|
"""1D Gaussian with std depending on the absolute distance"""
|
||||||
def __init__(self, size_average=True, reduce=True, evaluate=False):
|
def __init__(self, size_average=True, reduce=True, evaluate=False):
|
||||||
super(LaplacianLoss, self).__init__()
|
super().__init__()
|
||||||
self.size_average = size_average
|
self.size_average = size_average
|
||||||
self.reduce = reduce
|
self.reduce = reduce
|
||||||
self.evaluate = evaluate
|
self.evaluate = evaluate
|
||||||
@ -140,7 +140,7 @@ class GaussianLoss(torch.nn.Module):
|
|||||||
"""1D Gaussian with std depending on the absolute distance
|
"""1D Gaussian with std depending on the absolute distance
|
||||||
"""
|
"""
|
||||||
def __init__(self, device, size_average=True, reduce=True, evaluate=False):
|
def __init__(self, device, size_average=True, reduce=True, evaluate=False):
|
||||||
super(GaussianLoss, self).__init__()
|
super().__init__()
|
||||||
self.size_average = size_average
|
self.size_average = size_average
|
||||||
self.reduce = reduce
|
self.reduce = reduce
|
||||||
self.evaluate = evaluate
|
self.evaluate = evaluate
|
||||||
|
|||||||
@ -34,10 +34,9 @@ class Trainer:
|
|||||||
tasks = ('d', 'x', 'y', 'h', 'w', 'l', 'ori', 'aux')
|
tasks = ('d', 'x', 'y', 'h', 'w', 'l', 'ori', 'aux')
|
||||||
val_task = 'd'
|
val_task = 'd'
|
||||||
lambdas = (1, 1, 1, 1, 1, 1, 1, 1)
|
lambdas = (1, 1, 1, 1, 1, 1, 1, 1)
|
||||||
|
clusters = ['10', '20', '30', '40']
|
||||||
|
|
||||||
def __init__(self, joints, epochs=100, bs=256, dropout=0.2, lr=0.002,
|
def __init__(self, args):
|
||||||
sched_step=20, sched_gamma=1, hidden_size=256, n_stage=3, r_seed=1, n_samples=100,
|
|
||||||
monocular=False, save=False, print_loss=True):
|
|
||||||
"""
|
"""
|
||||||
Initialize directories, load the data and parameters for the training
|
Initialize directories, load the data and parameters for the training
|
||||||
"""
|
"""
|
||||||
@ -49,31 +48,29 @@ class Trainer:
|
|||||||
dir_logs = os.path.join('data', 'logs')
|
dir_logs = os.path.join('data', 'logs')
|
||||||
if not os.path.exists(dir_logs):
|
if not os.path.exists(dir_logs):
|
||||||
warnings.warn("Warning: default logs directory not found")
|
warnings.warn("Warning: default logs directory not found")
|
||||||
assert os.path.exists(joints), "Input file not found"
|
assert os.path.exists(args.joints), "Input file not found"
|
||||||
|
|
||||||
self.joints = joints
|
self.joints = args.joints
|
||||||
self.num_epochs = epochs
|
self.num_epochs = args.epochs
|
||||||
self.save = save
|
self.no_save = args.no_save
|
||||||
self.print_loss = print_loss
|
self.print_loss = args.print_loss
|
||||||
self.monocular = monocular
|
self.monocular = args.monocular
|
||||||
self.lr = lr
|
self.lr = args.lr
|
||||||
self.sched_step = sched_step
|
self.sched_step = args.sched_step
|
||||||
self.sched_gamma = sched_gamma
|
self.sched_gamma = args.sched_gamma
|
||||||
self.clusters = ['10', '20', '30', '50', '>50']
|
self.hidden_size = args.hidden_size
|
||||||
self.hidden_size = hidden_size
|
self.n_stage = args.n_stage
|
||||||
self.n_stage = n_stage
|
|
||||||
self.dir_out = dir_out
|
self.dir_out = dir_out
|
||||||
self.n_samples = n_samples
|
self.r_seed = args.r_seed
|
||||||
self.r_seed = r_seed
|
self.auto_tune_mtl = args.auto_tune_mtl
|
||||||
self.auto_tune_mtl = False
|
|
||||||
|
|
||||||
# Select the device
|
# Select the device
|
||||||
use_cuda = torch.cuda.is_available()
|
use_cuda = torch.cuda.is_available()
|
||||||
self.device = torch.device("cuda" if use_cuda else "cpu")
|
self.device = torch.device("cuda" if use_cuda else "cpu")
|
||||||
print('Device: ', self.device)
|
print('Device: ', self.device)
|
||||||
torch.manual_seed(r_seed)
|
torch.manual_seed(self.r_seed)
|
||||||
if use_cuda:
|
if use_cuda:
|
||||||
torch.cuda.manual_seed(r_seed)
|
torch.cuda.manual_seed(self.r_seed)
|
||||||
|
|
||||||
# Remove auxiliary task if monocular
|
# Remove auxiliary task if monocular
|
||||||
if self.monocular and self.tasks[-1] == 'aux':
|
if self.monocular and self.tasks[-1] == 'aux':
|
||||||
@ -95,25 +92,28 @@ class Trainer:
|
|||||||
input_size = 34
|
input_size = 34
|
||||||
output_size = 9
|
output_size = 9
|
||||||
|
|
||||||
|
name = 'monoloco_pp' if self.monocular else 'monstereo'
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
now_time = now.strftime("%Y%m%d-%H%M")[2:]
|
now_time = now.strftime("%Y%m%d-%H%M")[2:]
|
||||||
name_out = 'monstereo-' + now_time
|
name_out = name + '-' + now_time
|
||||||
if self.save:
|
if not self.no_save:
|
||||||
self.path_model = os.path.join(dir_out, name_out + '.pkl')
|
self.path_model = os.path.join(dir_out, name_out + '.pkl')
|
||||||
self.logger = set_logger(os.path.join(dir_logs, name_out))
|
self.logger = set_logger(os.path.join(dir_logs, name_out))
|
||||||
self.logger.info("Training arguments: \nepochs: {} \nbatch_size: {} \ndropout: {}"
|
self.logger.info("Training arguments: \nepochs: {} \nbatch_size: {} \ndropout: {}"
|
||||||
"\nmonocular: {} \nlearning rate: {} \nscheduler step: {} \nscheduler gamma: {} "
|
"\nmonocular: {} \nlearning rate: {} \nscheduler step: {} \nscheduler gamma: {} "
|
||||||
"\ninput_size: {} \noutput_size: {}\nhidden_size: {} \nn_stages: {} "
|
"\ninput_size: {} \noutput_size: {}\nhidden_size: {} \nn_stages: {} "
|
||||||
"\nr_seed: {} \nlambdas: {} \ninput_file: {}"
|
"\nr_seed: {} \nlambdas: {} \ninput_file: {}"
|
||||||
.format(epochs, bs, dropout, self.monocular, lr, sched_step, sched_gamma, input_size,
|
.format(args.epochs, args.bs, args.dropout, self.monocular,
|
||||||
output_size, hidden_size, n_stage, r_seed, self.lambdas, self.joints))
|
args.lr, args.sched_step, args.sched_gamma, input_size,
|
||||||
|
output_size, args.hidden_size, args.n_stage, args.r_seed,
|
||||||
|
self.lambdas, self.joints))
|
||||||
else:
|
else:
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Dataloader
|
# Dataloader
|
||||||
self.dataloaders = {phase: DataLoader(KeypointsDataset(self.joints, phase=phase),
|
self.dataloaders = {phase: DataLoader(KeypointsDataset(self.joints, phase=phase),
|
||||||
batch_size=bs, shuffle=True) for phase in ['train', 'val']}
|
batch_size=args.bs, shuffle=True) for phase in ['train', 'val']}
|
||||||
|
|
||||||
self.dataset_sizes = {phase: len(KeypointsDataset(self.joints, phase=phase))
|
self.dataset_sizes = {phase: len(KeypointsDataset(self.joints, phase=phase))
|
||||||
for phase in ['train', 'val']}
|
for phase in ['train', 'val']}
|
||||||
@ -122,15 +122,16 @@ class Trainer:
|
|||||||
self.logger.info('Sizes of the dataset: {}'.format(self.dataset_sizes))
|
self.logger.info('Sizes of the dataset: {}'.format(self.dataset_sizes))
|
||||||
print(">>> creating model")
|
print(">>> creating model")
|
||||||
|
|
||||||
self.model = MonStereoModel(input_size=input_size, output_size=output_size, linear_size=hidden_size,
|
self.model = MonStereoModel(input_size=input_size, output_size=output_size, linear_size=args.hidden_size,
|
||||||
p_dropout=dropout, num_stage=self.n_stage, device=self.device)
|
p_dropout=args.dropout, num_stage=self.n_stage, device=self.device)
|
||||||
self.model.to(self.device)
|
self.model.to(self.device)
|
||||||
print(">>> model params: {:.3f}M".format(sum(p.numel() for p in self.model.parameters()) / 1000000.0))
|
print(">>> model params: {:.3f}M".format(sum(p.numel() for p in self.model.parameters()) / 1000000.0))
|
||||||
print(">>> loss params: {}".format(sum(p.numel() for p in self.mt_loss.parameters())))
|
print(">>> loss params: {}".format(sum(p.numel() for p in self.mt_loss.parameters())))
|
||||||
|
|
||||||
# Optimizer and scheduler
|
# Optimizer and scheduler
|
||||||
all_params = chain(self.model.parameters(), self.mt_loss.parameters())
|
all_params = chain(self.model.parameters(), self.mt_loss.parameters())
|
||||||
self.optimizer = torch.optim.Adam(params=all_params, lr=lr)
|
self.optimizer = torch.optim.Adam(params=all_params, lr=args.lr)
|
||||||
|
self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min')
|
||||||
self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=self.sched_step, gamma=self.sched_gamma)
|
self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=self.sched_step, gamma=self.sched_gamma)
|
||||||
|
|
||||||
def train(self):
|
def train(self):
|
||||||
@ -155,11 +156,11 @@ class Trainer:
|
|||||||
labels = labels.to(self.device)
|
labels = labels.to(self.device)
|
||||||
with torch.set_grad_enabled(phase == 'train'):
|
with torch.set_grad_enabled(phase == 'train'):
|
||||||
if phase == 'train':
|
if phase == 'train':
|
||||||
|
self.optimizer.zero_grad()
|
||||||
outputs = self.model(inputs)
|
outputs = self.model(inputs)
|
||||||
loss, loss_values = self.mt_loss(outputs, labels, phase=phase)
|
loss, loss_values = self.mt_loss(outputs, labels, phase=phase)
|
||||||
self.optimizer.zero_grad()
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 2)
|
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 3)
|
||||||
self.optimizer.step()
|
self.optimizer.step()
|
||||||
self.scheduler.step()
|
self.scheduler.step()
|
||||||
|
|
||||||
@ -242,7 +243,7 @@ class Trainer:
|
|||||||
self.cout_stats(dic_err['val'], size_eval, clst=clst)
|
self.cout_stats(dic_err['val'], size_eval, clst=clst)
|
||||||
|
|
||||||
# Save the model and the results
|
# Save the model and the results
|
||||||
if self.save and not load:
|
if not (self.no_save or load):
|
||||||
torch.save(self.model.state_dict(), self.path_model)
|
torch.save(self.model.state_dict(), self.path_model)
|
||||||
print('-' * 120)
|
print('-' * 120)
|
||||||
self.logger.info("\nmodel saved: {} \n".format(self.path_model))
|
self.logger.info("\nmodel saved: {} \n".format(self.path_model))
|
||||||
@ -264,7 +265,6 @@ class Trainer:
|
|||||||
|
|
||||||
# Distance
|
# Distance
|
||||||
errs = torch.abs(extract_outputs(outputs)['d'] - extract_labels(labels)['d'])
|
errs = torch.abs(extract_outputs(outputs)['d'] - extract_labels(labels)['d'])
|
||||||
|
|
||||||
assert rel_frac > 0.99, "Variance of errors not supported with partial evaluation"
|
assert rel_frac > 0.99, "Variance of errors not supported with partial evaluation"
|
||||||
|
|
||||||
# Uncertainty
|
# Uncertainty
|
||||||
|
|||||||
@ -57,7 +57,7 @@ def get_iou_matches(boxes, boxes_gt, iou_min=0.3):
|
|||||||
ious.append(iou)
|
ious.append(iou)
|
||||||
idx_gt_max = int(np.argmax(ious))
|
idx_gt_max = int(np.argmax(ious))
|
||||||
if (ious[idx_gt_max] >= iou_min) and (idx_gt_max not in used):
|
if (ious[idx_gt_max] >= iou_min) and (idx_gt_max not in used):
|
||||||
matches.append((idx, idx_gt_max))
|
matches.append((int(idx), idx_gt_max))
|
||||||
used.append(idx_gt_max)
|
used.append(idx_gt_max)
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
@ -93,6 +93,6 @@ def reorder_matches(matches, boxes, mode='left_rigth'):
|
|||||||
|
|
||||||
# Order the boxes based on the left-right position in the image and
|
# Order the boxes based on the left-right position in the image and
|
||||||
ordered_boxes = np.argsort([box[0] for box in boxes]) # indices of boxes ordered from left to right
|
ordered_boxes = np.argsort([box[0] for box in boxes]) # indices of boxes ordered from left to right
|
||||||
matches_left = [idx for (idx, _) in matches]
|
matches_left = [int(idx) for (idx, _) in matches]
|
||||||
|
|
||||||
return [matches[matches_left.index(idx_boxes)] for idx_boxes in ordered_boxes if idx_boxes in matches_left]
|
return [matches[matches_left.index(idx_boxes)] for idx_boxes in ordered_boxes if idx_boxes in matches_left]
|
||||||
|
|||||||
@ -199,11 +199,11 @@ def factory_file(path_calib, dir_ann, basename, mode='left'):
|
|||||||
|
|
||||||
if mode == 'left':
|
if mode == 'left':
|
||||||
kk, tt = p_left[:]
|
kk, tt = p_left[:]
|
||||||
path_ann = os.path.join(dir_ann, basename + '.png.pifpaf.json')
|
path_ann = os.path.join(dir_ann, basename + '.png.predictions.json')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
kk, tt = p_right[:]
|
kk, tt = p_right[:]
|
||||||
path_ann = os.path.join(dir_ann + '_right', basename + '.png.pifpaf.json')
|
path_ann = os.path.join(dir_ann + '_right', basename + '.png.predictions.json')
|
||||||
|
|
||||||
from ..utils import open_annotations
|
from ..utils import open_annotations
|
||||||
annotations = open_annotations(path_ann)
|
annotations = open_annotations(path_ann)
|
||||||
|
|||||||
@ -20,14 +20,14 @@ def append_cluster(dic_jo, phase, xx, ys, kps):
|
|||||||
dic_jo[phase]['clst']['30']['kps'].append(kps)
|
dic_jo[phase]['clst']['30']['kps'].append(kps)
|
||||||
dic_jo[phase]['clst']['30']['X'].append(xx)
|
dic_jo[phase]['clst']['30']['X'].append(xx)
|
||||||
dic_jo[phase]['clst']['30']['Y'].append(ys)
|
dic_jo[phase]['clst']['30']['Y'].append(ys)
|
||||||
elif ys[3] < 50:
|
elif ys[3] <= 40:
|
||||||
dic_jo[phase]['clst']['50']['kps'].append(kps)
|
dic_jo[phase]['clst']['40']['kps'].append(kps)
|
||||||
dic_jo[phase]['clst']['50']['X'].append(xx)
|
dic_jo[phase]['clst']['40']['X'].append(xx)
|
||||||
dic_jo[phase]['clst']['50']['Y'].append(ys)
|
dic_jo[phase]['clst']['40']['Y'].append(ys)
|
||||||
else:
|
else:
|
||||||
dic_jo[phase]['clst']['>50']['kps'].append(kps)
|
dic_jo[phase]['clst']['>40']['kps'].append(kps)
|
||||||
dic_jo[phase]['clst']['>50']['X'].append(xx)
|
dic_jo[phase]['clst']['>40']['X'].append(xx)
|
||||||
dic_jo[phase]['clst']['>50']['Y'].append(ys)
|
dic_jo[phase]['clst']['>40']['Y'].append(ys)
|
||||||
|
|
||||||
|
|
||||||
def get_task_error(dd):
|
def get_task_error(dd):
|
||||||
@ -58,7 +58,7 @@ def make_new_directory(dir_out):
|
|||||||
if os.path.exists(dir_out):
|
if os.path.exists(dir_out):
|
||||||
shutil.rmtree(dir_out)
|
shutil.rmtree(dir_out)
|
||||||
os.makedirs(dir_out)
|
os.makedirs(dir_out)
|
||||||
print("Created empty output directory for {} txt files".format(dir_out))
|
print("Created empty output directory {} ".format(dir_out))
|
||||||
|
|
||||||
|
|
||||||
def normalize_hwl(lab):
|
def normalize_hwl(lab):
|
||||||
|
|||||||
@ -17,21 +17,22 @@ DPI = 200
|
|||||||
GRID_WIDTH = 0.5
|
GRID_WIDTH = 0.5
|
||||||
|
|
||||||
|
|
||||||
def show_results(dic_stats, clusters, dir_out='data/figures', show=False, save=False, stereo=True):
|
def show_results(dic_stats, clusters, net, dir_fig, show=False, save=False):
|
||||||
"""
|
"""
|
||||||
Visualize error as function of the distance and compare it with target errors based on human height analyses
|
Visualize error as function of the distance and compare it with target errors based on human height analyses
|
||||||
"""
|
"""
|
||||||
|
|
||||||
phase = 'test'
|
phase = 'test'
|
||||||
x_min = 3
|
x_min = 3
|
||||||
x_max = 42
|
# x_max = 42
|
||||||
|
x_max = 31
|
||||||
y_min = 0
|
y_min = 0
|
||||||
# y_max = 2.2
|
# y_max = 2.2
|
||||||
y_max = 3.5 if stereo else 5.2
|
y_max = 3.5 if net == 'monstereo' else 2.7
|
||||||
xx = np.linspace(x_min, x_max, 100)
|
xx = np.linspace(x_min, x_max, 100)
|
||||||
excl_clusters = ['all', 'easy', 'moderate', 'hard']
|
excl_clusters = ['all', 'easy', 'moderate', 'hard', '49']
|
||||||
clusters = [clst for clst in clusters if clst not in excl_clusters]
|
clusters = [clst for clst in clusters if clst not in excl_clusters]
|
||||||
styles = printing_styles(stereo)
|
styles = printing_styles(net)
|
||||||
for idx_style, style in enumerate(styles.items()):
|
for idx_style, style in enumerate(styles.items()):
|
||||||
plt.figure(idx_style, figsize=FIGSIZE)
|
plt.figure(idx_style, figsize=FIGSIZE)
|
||||||
plt.grid(linewidth=GRID_WIDTH)
|
plt.grid(linewidth=GRID_WIDTH)
|
||||||
@ -48,10 +49,10 @@ def show_results(dic_stats, clusters, dir_out='data/figures', show=False, save=F
|
|||||||
plt.plot(xxs, errs, marker=styles['mks'][idx], markersize=styles['mksizes'][idx],
|
plt.plot(xxs, errs, marker=styles['mks'][idx], markersize=styles['mksizes'][idx],
|
||||||
linewidth=styles['lws'][idx],
|
linewidth=styles['lws'][idx],
|
||||||
label=styles['labels'][idx], linestyle=styles['lstyles'][idx], color=styles['colors'][idx])
|
label=styles['labels'][idx], linestyle=styles['lstyles'][idx], color=styles['colors'][idx])
|
||||||
if method in ('monstereo', 'pseudo-lidar'):
|
if method in ('monstereo', 'monoloco_pp', 'pseudo-lidar'):
|
||||||
for i, x in enumerate(xxs):
|
for i, x in enumerate(xxs):
|
||||||
plt.text(x, errs[i], str(cnts[i]), fontsize=FONTSIZE)
|
plt.text(x, errs[i] - 0.1, str(cnts[i]), fontsize=FONTSIZE)
|
||||||
if not stereo:
|
if net == 'monoloco_pp':
|
||||||
plt.plot(xx, get_task_error(xx), '--', label="Task error", color='lightgreen', linewidth=2.5)
|
plt.plot(xx, get_task_error(xx), '--', label="Task error", color='lightgreen', linewidth=2.5)
|
||||||
# if stereo:
|
# if stereo:
|
||||||
# yy_stereo = get_pixel_error(xx)
|
# yy_stereo = get_pixel_error(xx)
|
||||||
@ -62,61 +63,61 @@ def show_results(dic_stats, clusters, dir_out='data/figures', show=False, save=F
|
|||||||
plt.yticks(fontsize=FONTSIZE)
|
plt.yticks(fontsize=FONTSIZE)
|
||||||
if save:
|
if save:
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
mode = 'stereo' if stereo else 'mono'
|
path_fig = os.path.join(dir_fig, 'results_' + net + '.png')
|
||||||
path_fig = os.path.join(dir_out, 'results_' + mode + '.png')
|
|
||||||
plt.savefig(path_fig, dpi=DPI)
|
plt.savefig(path_fig, dpi=DPI)
|
||||||
print("Figure of results " + mode + " saved in {}".format(path_fig))
|
print("Figure of results " + net + " saved in {}".format(path_fig))
|
||||||
if show:
|
if show:
|
||||||
plt.show()
|
plt.show()
|
||||||
plt.close('all')
|
plt.close('all')
|
||||||
|
|
||||||
|
|
||||||
def show_spread(dic_stats, clusters, dir_out='data/figures', show=False, save=False):
|
def show_spread(dic_stats, clusters, net, dir_fig, show=False, save=False):
|
||||||
"""Predicted confidence intervals and task error as a function of ground-truth distance"""
|
"""Predicted confidence intervals and task error as a function of ground-truth distance"""
|
||||||
|
|
||||||
|
assert net in ('monoloco_pp', 'monstereo'), "network not recognized"
|
||||||
phase = 'test'
|
phase = 'test'
|
||||||
excl_clusters = ['all', 'easy', 'moderate', 'hard']
|
excl_clusters = ['all', 'easy', 'moderate', 'hard', '49']
|
||||||
clusters = [clst for clst in clusters if clst not in excl_clusters]
|
clusters = [clst for clst in clusters if clst not in excl_clusters]
|
||||||
x_min = 3
|
x_min = 3
|
||||||
x_max = 42
|
x_max = 31
|
||||||
y_min = 0
|
y_min = 0
|
||||||
|
|
||||||
for method in ('monoloco_pp', 'monstereo'):
|
plt.figure(2, figsize=FIGSIZE)
|
||||||
plt.figure(2, figsize=FIGSIZE)
|
xxs = get_distances(clusters)
|
||||||
xxs = get_distances(clusters)
|
bbs = np.array([dic_stats[phase][net][key]['std_ale'] for key in clusters[:-1]])
|
||||||
bbs = np.array([dic_stats[phase][method][key]['std_ale'] for key in clusters[:-1]])
|
xx = np.linspace(x_min, x_max, 100)
|
||||||
if method == 'monoloco_pp':
|
if net == 'monoloco_pp':
|
||||||
y_max = 5
|
y_max = 2.7
|
||||||
color = 'deepskyblue'
|
color = 'deepskyblue'
|
||||||
epis = np.array([dic_stats[phase][method][key]['std_epi'] for key in clusters[:-1]])
|
epis = np.array([dic_stats[phase][net][key]['std_epi'] for key in clusters[:-1]])
|
||||||
plt.plot(xxs, epis, marker='o', color='coral', label="Combined uncertainty (\u03C3)")
|
plt.plot(xxs, epis, marker='o', color='coral', linewidth=4, markersize=8, label="Combined uncertainty (\u03C3)")
|
||||||
else:
|
else:
|
||||||
y_max = 3.5
|
y_max = 3.5
|
||||||
color = 'b'
|
color = 'b'
|
||||||
plt.plot(xx, get_pixel_error(xx), linewidth=2.5, color='k', label='Pixel error')
|
plt.plot(xx, get_pixel_error(xx), linewidth=2.5, color='k', label='Pixel error')
|
||||||
plt.plot(xxs, bbs, marker='s', color=color, label="Aleatoric uncertainty (b)", linewidth=4, markersize=8)
|
plt.plot(xxs, bbs, marker='s', color=color, label="Aleatoric uncertainty (b)", linewidth=4, markersize=8)
|
||||||
xx = np.linspace(x_min, x_max, 100)
|
plt.plot(xx, get_task_error(xx), '--', label="Task error (monocular bound)", color='lightgreen', linewidth=4)
|
||||||
plt.plot(xx, get_task_error(xx), '--', label="Task error (monocular bound)", color='lightgreen', linewidth=4)
|
|
||||||
|
|
||||||
plt.xlabel("Ground-truth distance [m]", fontsize=FONTSIZE)
|
plt.xlabel("Ground-truth distance [m]", fontsize=FONTSIZE)
|
||||||
plt.ylabel("Uncertainty [m]", fontsize=FONTSIZE)
|
plt.ylabel("Uncertainty [m]", fontsize=FONTSIZE)
|
||||||
plt.xlim(x_min, x_max)
|
plt.xlim(x_min, x_max)
|
||||||
plt.ylim(y_min, y_max)
|
plt.ylim(y_min, y_max)
|
||||||
plt.grid(linewidth=GRID_WIDTH)
|
plt.grid(linewidth=GRID_WIDTH)
|
||||||
plt.legend(prop={'size': FONTSIZE})
|
plt.legend(prop={'size': FONTSIZE})
|
||||||
plt.xticks(fontsize=FONTSIZE)
|
plt.xticks(fontsize=FONTSIZE)
|
||||||
plt.yticks(fontsize=FONTSIZE)
|
plt.yticks(fontsize=FONTSIZE)
|
||||||
if save:
|
|
||||||
plt.tight_layout()
|
if save:
|
||||||
path_fig = os.path.join(dir_out, 'spread_' + method + '.png')
|
plt.tight_layout()
|
||||||
plt.savefig(path_fig, dpi=DPI)
|
path_fig = os.path.join(dir_fig, 'spread_' + net + '.png')
|
||||||
print("Figure of confidence intervals saved in {}".format(path_fig))
|
plt.savefig(path_fig, dpi=DPI)
|
||||||
if show:
|
print("Figure of confidence intervals saved in {}".format(path_fig))
|
||||||
plt.show()
|
if show:
|
||||||
plt.close('all')
|
plt.show()
|
||||||
|
plt.close('all')
|
||||||
|
|
||||||
|
|
||||||
def show_task_error(show, save, dir_out='data/figures'):
|
def show_task_error(dir_fig, show, save):
|
||||||
"""Task error figure"""
|
"""Task error figure"""
|
||||||
plt.figure(3, figsize=FIGSIZE)
|
plt.figure(3, figsize=FIGSIZE)
|
||||||
xx = np.linspace(0.1, 50, 100)
|
xx = np.linspace(0.1, 50, 100)
|
||||||
@ -147,7 +148,7 @@ def show_task_error(show, save, dir_out='data/figures'):
|
|||||||
plt.xticks(fontsize=FONTSIZE)
|
plt.xticks(fontsize=FONTSIZE)
|
||||||
plt.yticks(fontsize=FONTSIZE)
|
plt.yticks(fontsize=FONTSIZE)
|
||||||
if save:
|
if save:
|
||||||
path_fig = os.path.join(dir_out, 'task_error.png')
|
path_fig = os.path.join(dir_fig, 'task_error.png')
|
||||||
plt.savefig(path_fig, dpi=DPI)
|
plt.savefig(path_fig, dpi=DPI)
|
||||||
print("Figure of task error saved in {}".format(path_fig))
|
print("Figure of task error saved in {}".format(path_fig))
|
||||||
if show:
|
if show:
|
||||||
@ -181,7 +182,7 @@ def show_method(save, dir_out='data/figures'):
|
|||||||
plt.close('all')
|
plt.close('all')
|
||||||
|
|
||||||
|
|
||||||
def show_box_plot(dic_errors, clusters, dir_out='data/figures', show=False, save=False):
|
def show_box_plot(dic_errors, clusters, dir_fig, show=False, save=False):
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
excl_clusters = ['all', 'easy', 'moderate', 'hard']
|
excl_clusters = ['all', 'easy', 'moderate', 'hard']
|
||||||
clusters = [int(clst) for clst in clusters if clst not in excl_clusters]
|
clusters = [int(clst) for clst in clusters if clst not in excl_clusters]
|
||||||
@ -205,7 +206,7 @@ def show_box_plot(dic_errors, clusters, dir_out='data/figures', show=False, save
|
|||||||
plt.ylim(y_min, y_max)
|
plt.ylim(y_min, y_max)
|
||||||
|
|
||||||
if save:
|
if save:
|
||||||
path_fig = os.path.join(dir_out, 'box_plot_' + name + '.png')
|
path_fig = os.path.join(dir_fig, 'box_plot_' + name + '.png')
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(path_fig, dpi=DPI)
|
plt.savefig(path_fig, dpi=DPI)
|
||||||
print("Figure of box plot saved in {}".format(path_fig))
|
print("Figure of box plot saved in {}".format(path_fig))
|
||||||
@ -300,8 +301,8 @@ def get_percentile(dist_gmm):
|
|||||||
# mad_d = np.mean(np.abs(dist_d - mu_d))
|
# mad_d = np.mean(np.abs(dist_d - mu_d))
|
||||||
|
|
||||||
|
|
||||||
def printing_styles(stereo):
|
def printing_styles(net):
|
||||||
if stereo:
|
if net == 'monstereo':
|
||||||
style = {"labels": ['3DOP', 'PSF', 'MonoLoco', 'MonoPSR', 'Pseudo-Lidar', 'Our MonStereo'],
|
style = {"labels": ['3DOP', 'PSF', 'MonoLoco', 'MonoPSR', 'Pseudo-Lidar', 'Our MonStereo'],
|
||||||
"methods": ['3dop', 'psf', 'monoloco', 'monopsr', 'pseudo-lidar', 'monstereo'],
|
"methods": ['3dop', 'psf', 'monoloco', 'monopsr', 'pseudo-lidar', 'monstereo'],
|
||||||
"mks": ['s', 'p', 'o', 'v', '*', '^'],
|
"mks": ['s', 'p', 'o', 'v', '*', '^'],
|
||||||
@ -309,11 +310,12 @@ def printing_styles(stereo):
|
|||||||
"colors": ['gold', 'skyblue', 'darkgreen', 'pink', 'darkorange', 'b'],
|
"colors": ['gold', 'skyblue', 'darkgreen', 'pink', 'darkorange', 'b'],
|
||||||
"lstyles": ['solid', 'solid', 'dashed', 'dashed', 'solid', 'solid']}
|
"lstyles": ['solid', 'solid', 'dashed', 'dashed', 'solid', 'solid']}
|
||||||
else:
|
else:
|
||||||
style = {"labels": ['Mono3D', 'Geometric Baseline', 'MonoPSR', '3DOP (stereo)', 'MonoLoco', 'Monoloco++'],
|
style = {"labels": ['Geometric Baseline', 'MonoPSR', 'MonoDIS', '3DOP (stereo)',
|
||||||
"methods": ['m3d', 'geometric', 'monopsr', '3dop', 'monoloco', 'monoloco_pp'],
|
'MonoLoco', 'Monoloco++'],
|
||||||
|
"methods": ['geometric', 'monopsr', 'monodis', '3dop', 'monoloco', 'monoloco_pp'],
|
||||||
"mks": ['*', '^', 'p', '.', 's', 'o', 'o'],
|
"mks": ['*', '^', 'p', '.', 's', 'o', 'o'],
|
||||||
"mksizes": [6, 6, 6, 6, 6, 6], "lws": [1.5, 1.5, 1.5, 1.5, 1.5, 2.2],
|
"mksizes": [6, 6, 6, 6, 6, 6], "lws": [1.5, 1.5, 1.5, 1.5, 1.5, 2.2],
|
||||||
"colors": ['r', 'purple', 'olive', 'darkorange', 'b', 'darkblue'],
|
"colors": ['purple', 'olive', 'r', 'darkorange', 'b', 'darkblue'],
|
||||||
"lstyles": ['solid', 'solid', 'solid', 'dashdot', 'solid', 'solid', ]}
|
"lstyles": ['solid', 'solid', 'solid', 'dashdot', 'solid', 'solid', ]}
|
||||||
|
|
||||||
return style
|
return style
|
||||||
|
|||||||
@ -1,3 +1,6 @@
|
|||||||
|
|
||||||
|
# File adapted from https://github.com/vita-epfl/openpifpaf
|
||||||
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -39,21 +42,20 @@ def canvas(fig_file=None, show=True, **kwargs):
|
|||||||
@contextmanager
|
@contextmanager
|
||||||
def image_canvas(image, fig_file=None, show=True, dpi_factor=1.0, fig_width=10.0, **kwargs):
|
def image_canvas(image, fig_file=None, show=True, dpi_factor=1.0, fig_width=10.0, **kwargs):
|
||||||
if 'figsize' not in kwargs:
|
if 'figsize' not in kwargs:
|
||||||
kwargs['figsize'] = (fig_width, fig_width * image.shape[0] / image.shape[1])
|
kwargs['figsize'] = (fig_width, fig_width * image.size[1] / image.size[0])
|
||||||
|
|
||||||
fig = plt.figure(**kwargs)
|
fig = plt.figure(**kwargs)
|
||||||
ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
|
ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
|
||||||
ax.set_axis_off()
|
ax.set_axis_off()
|
||||||
ax.set_xlim(0, image.shape[1])
|
ax.set_xlim(0, image.size[0])
|
||||||
ax.set_ylim(image.shape[0], 0)
|
ax.set_ylim(image.size[1], 0)
|
||||||
fig.add_axes(ax)
|
fig.add_axes(ax)
|
||||||
image_2 = ndimage.gaussian_filter(image, sigma=2.5)
|
image_2 = ndimage.gaussian_filter(image, sigma=2.5)
|
||||||
ax.imshow(image_2, alpha=0.4)
|
ax.imshow(image_2, alpha=0.4)
|
||||||
|
|
||||||
yield ax
|
yield ax
|
||||||
|
|
||||||
if fig_file:
|
if fig_file:
|
||||||
fig.savefig(fig_file, dpi=image.shape[1] / kwargs['figsize'][0] * dpi_factor)
|
fig.savefig(fig_file, dpi=image.size[0] / kwargs['figsize'][0] * dpi_factor)
|
||||||
print('keypoints image saved')
|
print('keypoints image saved')
|
||||||
if show:
|
if show:
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|||||||
@ -28,7 +28,7 @@ def image_attributes(dpi, output_types):
|
|||||||
fontsize_num=round(22 * c),
|
fontsize_num=round(22 * c),
|
||||||
fontsize_ax=round(16 * c),
|
fontsize_ax=round(16 * c),
|
||||||
linewidth=round(8 * c),
|
linewidth=round(8 * c),
|
||||||
markersize=round(16 * c),
|
markersize=round(13 * c),
|
||||||
y_box_margin=round(24 * math.sqrt(c)),
|
y_box_margin=round(24 * math.sqrt(c)),
|
||||||
stereo=dict(color='deepskyblue',
|
stereo=dict(color='deepskyblue',
|
||||||
numcolor='darkorange',
|
numcolor='darkorange',
|
||||||
@ -58,7 +58,7 @@ class Printer:
|
|||||||
self.output_path = output_path
|
self.output_path = output_path
|
||||||
self.kk = kk
|
self.kk = kk
|
||||||
self.output_types = args.output_types
|
self.output_types = args.output_types
|
||||||
self.z_max = args.z_max # To include ellipses in the image
|
self.z_max = args.z_max # set max distance to show instances
|
||||||
self.show_all = args.show_all
|
self.show_all = args.show_all
|
||||||
self.show = args.show_all
|
self.show = args.show_all
|
||||||
self.save = not args.no_save
|
self.save = not args.no_save
|
||||||
@ -74,26 +74,41 @@ class Printer:
|
|||||||
self.xx_gt = [xx[0] for xx in dic_ann['xyz_real']]
|
self.xx_gt = [xx[0] for xx in dic_ann['xyz_real']]
|
||||||
self.xx_pred = [xx[0] for xx in dic_ann['xyz_pred']]
|
self.xx_pred = [xx[0] for xx in dic_ann['xyz_pred']]
|
||||||
|
|
||||||
|
# Set maximum distance
|
||||||
|
self.dd_pred = dic_ann['dds_pred']
|
||||||
|
self.dd_real = dic_ann['dds_real']
|
||||||
|
self.z_max = int(min(self.z_max, 4 + max(max(self.dd_pred), max(self.dd_real, default=0))))
|
||||||
|
|
||||||
# Do not print instances outside z_max
|
# Do not print instances outside z_max
|
||||||
self.zz_gt = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0
|
self.zz_gt = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0
|
||||||
for idx, xx in enumerate(dic_ann['xyz_real'])]
|
for idx, xx in enumerate(dic_ann['xyz_real'])]
|
||||||
self.zz_pred = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0
|
self.zz_pred = [xx[2] if xx[2] < self.z_max - self.stds_epi[idx] else 0
|
||||||
for idx, xx in enumerate(dic_ann['xyz_pred'])]
|
for idx, xx in enumerate(dic_ann['xyz_pred'])]
|
||||||
self.dd_pred = dic_ann['dds_pred']
|
|
||||||
self.dd_real = dic_ann['dds_real']
|
|
||||||
self.uv_heads = dic_ann['uv_heads']
|
self.uv_heads = dic_ann['uv_heads']
|
||||||
self.uv_shoulders = dic_ann['uv_shoulders']
|
self.uv_shoulders = dic_ann['uv_shoulders']
|
||||||
self.boxes = dic_ann['boxes']
|
self.boxes = dic_ann['boxes']
|
||||||
self.boxes_gt = dic_ann['boxes_gt']
|
self.boxes_gt = dic_ann['boxes_gt']
|
||||||
self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1])
|
self.uv_camera = (int(self.im.size[0] / 2), self.im.size[1])
|
||||||
if dic_ann['aux']:
|
self.auxs = dic_ann['aux']
|
||||||
self.auxs = dic_ann['aux'] if dic_ann['aux'] else None
|
if len(self.auxs) == 0:
|
||||||
|
self.modes = ['mono'] * len(self.dd_pred)
|
||||||
|
else:
|
||||||
|
self.modes = []
|
||||||
|
for aux in self.auxs:
|
||||||
|
if aux <= 0.3:
|
||||||
|
self.modes.append('mono')
|
||||||
|
else:
|
||||||
|
self.modes.append('stereo')
|
||||||
|
|
||||||
def factory_axes(self):
|
def factory_axes(self, dic_out):
|
||||||
"""Create axes for figures: front bird multi"""
|
"""Create axes for figures: front bird multi"""
|
||||||
axes = []
|
axes = []
|
||||||
figures = []
|
figures = []
|
||||||
|
|
||||||
|
# Process the annotation dictionary of monoloco
|
||||||
|
self._process_results(dic_out)
|
||||||
|
|
||||||
# Initialize multi figure, resizing it for aesthetic proportion
|
# Initialize multi figure, resizing it for aesthetic proportion
|
||||||
if 'multi' in self.output_types:
|
if 'multi' in self.output_types:
|
||||||
assert 'bird' and 'front' not in self.output_types, \
|
assert 'bird' and 'front' not in self.output_types, \
|
||||||
@ -150,10 +165,7 @@ class Printer:
|
|||||||
axes.append(ax1)
|
axes.append(ax1)
|
||||||
return figures, axes
|
return figures, axes
|
||||||
|
|
||||||
def draw(self, figures, axes, dic_out, image):
|
def draw(self, figures, axes, image):
|
||||||
|
|
||||||
# Process the annotation dictionary of monoloco
|
|
||||||
self._process_results(dic_out)
|
|
||||||
|
|
||||||
# whether to include instances that don't match the ground-truth
|
# whether to include instances that don't match the ground-truth
|
||||||
iterator = range(len(self.zz_pred)) if self.show_all else range(len(self.zz_gt))
|
iterator = range(len(self.zz_pred)) if self.show_all else range(len(self.zz_gt))
|
||||||
@ -163,9 +175,9 @@ class Printer:
|
|||||||
|
|
||||||
# Draw the front figure
|
# Draw the front figure
|
||||||
number = dict(flag=False, num=97)
|
number = dict(flag=False, num=97)
|
||||||
if 'multi' in self.output_types:
|
if any(xx in self.output_types for xx in ['front', 'multi']):
|
||||||
number['flag'] = True # add numbers
|
number['flag'] = True # add numbers
|
||||||
self.mpl_im0.set_data(image)
|
self.mpl_im0.set_data(image)
|
||||||
for idx in iterator:
|
for idx in iterator:
|
||||||
if any(xx in self.output_types for xx in ['front', 'multi']) and self.zz_pred[idx] > 0:
|
if any(xx in self.output_types for xx in ['front', 'multi']) and self.zz_pred[idx] > 0:
|
||||||
self._draw_front(axes[0],
|
self._draw_front(axes[0],
|
||||||
@ -199,8 +211,6 @@ class Printer:
|
|||||||
|
|
||||||
def _draw_front(self, ax, z, idx, number):
|
def _draw_front(self, ax, z, idx, number):
|
||||||
|
|
||||||
mode = 'stereo' if self.auxs[idx] > 0.3 else 'mono'
|
|
||||||
|
|
||||||
# Bbox
|
# Bbox
|
||||||
w = min(self.width-2, self.boxes[idx][2] - self.boxes[idx][0])
|
w = min(self.width-2, self.boxes[idx][2] - self.boxes[idx][0])
|
||||||
h = min(self.height-2, (self.boxes[idx][3] - self.boxes[idx][1]) * self.y_scale)
|
h = min(self.height-2, (self.boxes[idx][3] - self.boxes[idx][1]) * self.y_scale)
|
||||||
@ -211,12 +221,12 @@ class Printer:
|
|||||||
width=w,
|
width=w,
|
||||||
height=h,
|
height=h,
|
||||||
fill=False,
|
fill=False,
|
||||||
color=self.attr[mode]['color'],
|
color=self.attr[self.modes[idx]]['color'],
|
||||||
linewidth=self.attr[mode]['linewidth'])
|
linewidth=self.attr[self.modes[idx]]['linewidth'])
|
||||||
ax.add_patch(rectangle)
|
ax.add_patch(rectangle)
|
||||||
z_str = str(z).split(sep='.')
|
z_str = str(z).split(sep='.')
|
||||||
text = z_str[0] + '.' + z_str[1][0]
|
text = z_str[0] + '.' + z_str[1][0]
|
||||||
bbox_config = {'facecolor': self.attr[mode]['color'], 'alpha': 0.4, 'linewidth': 0}
|
bbox_config = {'facecolor': self.attr[self.modes[idx]]['color'], 'alpha': 0.4, 'linewidth': 0}
|
||||||
|
|
||||||
x_t = x0 - 1.5
|
x_t = x0 - 1.5
|
||||||
y_t = y1 + self.attr['y_box_margin']
|
y_t = y1 + self.attr['y_box_margin']
|
||||||
@ -236,12 +246,12 @@ class Printer:
|
|||||||
y1 + 14,
|
y1 + 14,
|
||||||
chr(number['num']),
|
chr(number['num']),
|
||||||
fontsize=self.attr['fontsize_num'],
|
fontsize=self.attr['fontsize_num'],
|
||||||
color=self.attr[mode]['numcolor'],
|
color=self.attr[self.modes[idx]]['numcolor'],
|
||||||
weight='bold')
|
weight='bold')
|
||||||
|
|
||||||
def _draw_text_bird(self, axes, idx, num):
|
def _draw_text_bird(self, axes, idx, num):
|
||||||
"""Plot the number in the bird eye view map"""
|
"""Plot the number in the bird eye view map"""
|
||||||
mode = 'stereo' if self.auxs[idx] > 0.3 else 'mono'
|
|
||||||
std = self.stds_epi[idx] if self.stds_epi[idx] > 0 else self.stds_ale[idx]
|
std = self.stds_epi[idx] if self.stds_epi[idx] > 0 else self.stds_ale[idx]
|
||||||
theta = math.atan2(self.zz_pred[idx], self.xx_pred[idx])
|
theta = math.atan2(self.zz_pred[idx], self.xx_pred[idx])
|
||||||
|
|
||||||
@ -250,7 +260,7 @@ class Printer:
|
|||||||
|
|
||||||
axes[1].text(self.xx_pred[idx] + delta_x + 0.2, self.zz_pred[idx] + delta_z + 0/2, chr(num),
|
axes[1].text(self.xx_pred[idx] + delta_x + 0.2, self.zz_pred[idx] + delta_z + 0/2, chr(num),
|
||||||
fontsize=self.attr['fontsize_bv'],
|
fontsize=self.attr['fontsize_bv'],
|
||||||
color=self.attr[mode]['numcolor'])
|
color=self.attr[self.modes[idx]]['numcolor'])
|
||||||
|
|
||||||
def _draw_uncertainty(self, axes, idx):
|
def _draw_uncertainty(self, axes, idx):
|
||||||
|
|
||||||
|
|||||||
7
setup.py
@ -18,7 +18,8 @@ setup(
|
|||||||
'monstereo.utils'
|
'monstereo.utils'
|
||||||
],
|
],
|
||||||
license='GNU AGPLv3',
|
license='GNU AGPLv3',
|
||||||
description='MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization',
|
description=' Perceiving Humans: from Monocular 3D Localization to Social Distancing '
|
||||||
|
'/ MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization',
|
||||||
long_description=open('README.md').read(),
|
long_description=open('README.md').read(),
|
||||||
long_description_content_type='text/markdown',
|
long_description_content_type='text/markdown',
|
||||||
author='Lorenzo Bertoni',
|
author='Lorenzo Bertoni',
|
||||||
@ -27,9 +28,7 @@ setup(
|
|||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
|
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'openpifpaf==0.8.0',
|
'openpifpaf>=0.11'
|
||||||
'torch==1.1.0',
|
|
||||||
'torchvision==0.3.0'
|
|
||||||
],
|
],
|
||||||
extras_require={
|
extras_require={
|
||||||
'eval': [
|
'eval': [
|
||||||
|
|||||||