diff --git a/README.md b/README.md index 5961db8..68a9e1a 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,14 @@ gif -This library is based on three research projects for monocular/stereo 3D human localization, orientation and social distancing. +This library is based on three research projects for monocular/stereo 3D human localization (detection), body orientation, and social distancing. > __MonStereo: When Monocular and Stereo Meet at the Tail of 3D Human Localization__
> _[L. Bertoni](https://scholar.google.com/citations?user=f-4YHeMAAAAJ&hl=en), [S. Kreiss](https://www.svenkreiss.com), [T. Mordan](https://people.epfl.ch/taylor.mordan/?lang=en), [A. Alahi](https://scholar.google.com/citations?user=UIhXQ64AAAAJ&hl=en)_, ICRA 2021
__[Article](https://arxiv.org/abs/2008.10913)__                 __[Citation](#Citation)__                 __[Video](#Todo)__ - + --- @@ -128,7 +128,7 @@ To show all the instances estimated by MonoLoco add the argument `show_all` to t It is also possible to run [openpifpaf](https://github.com/vita-epfl/openpifpaf) directly by usingt `--mode keypoints`. All the other pifpaf arguments are also supported -and can be checked with `python -m monstereo.run predict --help`. +and can be checked with `python -m monoloco.run predict --help`. ![predict](docs/out_002282_pifpaf.jpg) @@ -149,16 +149,17 @@ python3 -m monoloco.run predict --mode stereo \ -o data/output -long_edge 2500 ``` -![Crowded scene](docs/out_000840.jpg) +![Crowded scene](docs/out_000840_multi.jpg) ``` python3 -m monoloco.run predict --glob docs/005523*.png \ --output_types multi \ --model data/models/ms-200710-1511.pkl \ --path_gt \ - -o data/output --long_edge 2500 +-o data/output --long_edge 2500 \ +--instance-threshold 0.05 --seed-threshold 0.05 ``` -![Occluded hard example](docs/out_005523.jpg) +![Occluded hard example](docs/out_005523.png.multi.jpg) ## B) Social Distancing (and Talking activity) To visualize social distancing compliance, simply add the argument `--social-distance` to the predict command. This visualization is not supported with a stereo camera. @@ -180,17 +181,16 @@ python -m monoloco.run predict docs/frame0038.jpg \ - -## C) Orientation and Bounding Box dimensions -MonoLoco++ estimates orientation and box dimensions as well. Results are saved in a json file when using the command +## C) Orientation and Bounding Box dimensions +The network estimates orientation and box dimensions as well. Results are saved in a json file when using the command `--output_types json`. At the moment, the only visualization including orientation is the social distancing one. -
+
## Training We train on the KITTI dataset (MonoLoco/Monoloco++/MonStereo) or the nuScenes dataset (MonoLoco) specifying the path of the json file containing the input joints. Please download them [heere](https://drive.google.com/file/d/1e-wXTO460ip_Je2NdXojxrOrJ-Oirlgh/view?usp=sharing) or follow [preprocessing instructions](#Preprocessing). -Our results for MonoLoco++ are obtained with: +Results for MonoLoco++ are obtained with: ``` python -m monoloco.run train --joints data/arrays/joints-kitti-201202-1743.json --save --monocular @@ -207,7 +207,7 @@ Finally, for a more extensive list of available parameters, run: `python -m monstereo.run train --help` -
+
## Preprocessing Preprocessing and training step are already fully supported by the code provided, diff --git a/docs/000840.png b/docs/000840.png new file mode 100755 index 0000000..fb2baa3 Binary files /dev/null and b/docs/000840.png differ diff --git a/docs/000840_right.png b/docs/000840_right.png new file mode 100755 index 0000000..cef53d1 Binary files /dev/null and b/docs/000840_right.png differ diff --git a/docs/000840_multi.jpg b/docs/out_000840_multi.jpg similarity index 100% rename from docs/000840_multi.jpg rename to docs/out_000840_multi.jpg diff --git a/docs/out_002282.png.multi.jpg b/docs/out_002282.png.multi.jpg deleted file mode 100644 index 8217b35..0000000 Binary files a/docs/out_002282.png.multi.jpg and /dev/null differ diff --git a/docs/out_005523.jpg b/docs/out_005523.jpg deleted file mode 100644 index 5488f35..0000000 Binary files a/docs/out_005523.jpg and /dev/null differ diff --git a/docs/out_005523.png.multi.jpg b/docs/out_005523.png.multi.jpg new file mode 100644 index 0000000..e59123f Binary files /dev/null and b/docs/out_005523.png.multi.jpg differ diff --git a/docs/truck.jpg b/docs/truck.jpg deleted file mode 100644 index 7d1b3af..0000000 Binary files a/docs/truck.jpg and /dev/null differ diff --git a/monoloco/predict.py b/monoloco/predict.py index c0a8fef..b2de878 100644 --- a/monoloco/predict.py +++ b/monoloco/predict.py @@ -108,6 +108,7 @@ def factory_from_args(args): else: args.batch_size = 1 + # Patch for stereo images with batch_size = 2 if args.batch_size == 2 and not args.long_edge: args.long_edge = 1238 LOG.info("Long-edge set to %i".format(args.long_edge)) @@ -177,7 +178,9 @@ def predict(args): else: file_name = os.path.basename(meta['file_name']) output_path = os.path.join(args.output_directory, 'out_' + file_name) - print('image', batch_i, meta['file_name'], output_path) + + im_name = os.path.basename(meta['file_name']) + print(f'{batch_i} image {im_name} saved as {output_path}') # Only for MonStereo else: @@ -186,7 +189,7 @@ def predict(args): # 3D Predictions if args.mode != 'keypoints': im_size = (cpu_image.size[0], cpu_image.size[1]) # Original - kk, dic_gt = factory_for_gt(im_size, focal_length=args.focal, name=file_name, path_gt=args.path_gt) + kk, dic_gt = factory_for_gt(im_size, focal_length=args.focal, name=im_name, path_gt=args.path_gt) # Preprocess pifpaf outputs and run monoloco boxes, keypoints = preprocess_pifpaf(pifpaf_outs['left'], im_size, enlarge_boxes=False) diff --git a/monoloco/run.py b/monoloco/run.py index e7a4f9c..566e71b 100644 --- a/monoloco/run.py +++ b/monoloco/run.py @@ -45,6 +45,8 @@ def cli(): predict_parser.add_argument('--font-size', default=0, type=int, help='annotation font size') predict_parser.add_argument('--monocolor-connections', default=False, action='store_true', help='use a single color per instance') + predict_parser.add_argument('--instance-threshold', type=float, default=None, help='threshold for entire instance') + predict_parser.add_argument('--seed-threshold', type=float, default=None, help='threshold for single seed') predict_parser.add_argument('--disable-cuda', action='store_true', help='disable CUDA') predict_parser.add_argument('--focal', help='focal length in mm for a sensor size of 7.2x5.4 mm. Default nuScenes sensor',