diff --git a/README.md b/README.md index b9ffe8e..3fab5dc 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ If you provide a ground-truth json file to compare the predictions of the networ For an example image, run the following command: ```sh -python -m monoloco.run predict docs/test_002282.png \ +python3 -m monoloco.run predict docs/test_002282.png \ --path_gt names-kitti-200615-1022.json \ -o \ --long-edge @@ -140,7 +140,7 @@ To show all the instances estimated by MonoLoco add the argument `--show_all` to It is also possible to run [openpifpaf](https://github.com/vita-epfl/openpifpaf) directly by using `--mode keypoints`. All the other pifpaf arguments are also supported -and can be checked with `python -m monoloco.run predict --help`. +and can be checked with `python3 -m monoloco.run predict --help`. ![predict](docs/out_test_002282_pifpaf.jpg) @@ -178,7 +178,7 @@ To visualize social distancing compliance, simply add the argument `social_dista Threshold distance and radii (for F-formations) can be set using `--threshold-dist` and `--radii`, respectively. For more info, run: -`python -m monoloco.run predict --help` +`python3 -m monoloco.run predict --help` **Examples**
An example from the Collective Activity Dataset is provided below. @@ -188,11 +188,11 @@ An example from the Collective Activity Dataset is provided below. To visualize social distancing run the below, command: ```sh -pip install scipy +pip3 install scipy ``` ```sh -python -m monoloco.run predict docs/test_frame0032.jpg \ +python3 -m monoloco.run predict docs/test_frame0032.jpg \ --activities social_distance --output_types front bird ``` @@ -203,13 +203,14 @@ To detect raised hand, you can add the argument `--activities raise_hand` to the For example, the below image is obtained with: ```sh -python -m monoloco.run predict --activities raise_hand social_distance output_types front +python3 -m monoloco.run predict docs/raising_hand.jpg \ +--activities raise_hand social_distance --output_types front ``` For more info, run: -`python -m monoloco.run predict --help` +`python3 -m monoloco.run predict --help` ## D) Orientation and Bounding Box dimensions The network estimates orientation and box dimensions as well. Results are saved in a json file when using the command @@ -218,11 +219,14 @@ The network estimates orientation and box dimensions as well. Results are saved ## E) Webcam You can use the webcam as input by using the `--webcam` argument. By default the `--z_max` is set to 10 while using the webcam and the `--long-edge` is set to 144. If multiple webcams are plugged in you can choose between them using `--camera`, for instance to use the second camera you can add `--camera 1`. - +You also need to install `opencv-python` to use this feature : +```sh +pip3 install opencv-python +``` Example command: ```sh -python -m monoloco.run predict --webcam \ +python3 -m monoloco.run predict --webcam \ --activities raise_hand social_distance ``` @@ -231,21 +235,22 @@ We train on the KITTI dataset (MonoLoco/Monoloco++/MonStereo) or the nuScenes da Results for [MonoLoco++](###Tables) are obtained with: -``` -python -m monoloco.run train --joints data/arrays/joints-kitti-mono-210422-1600.json +```sh +python3 -m monoloco.run train --joints data/arrays/joints-kitti-mono-210422-1600.json ``` While for the [MonStereo](###Tables) results run: ```sh -python -m monoloco.run train --joints data/arrays/joints-kitti-stereo-210422-1601.json --lr 0.003 --mode stereo +python3 -m monoloco.run train --joints data/arrays/joints-kitti-stereo-210422-1601.json \ +--lr 0.003 --mode stereo ``` If you are interested in the original results of the MonoLoco ICCV article (now improved with MonoLoco++), please refer to the tag v0.4.9 in this repository. Finally, for a more extensive list of available parameters, run: -`python -m monstereo.run train --help` +`python3 -m monstereo.run train --help`
@@ -284,7 +289,7 @@ Download kitti images (from left and right cameras), ground-truth files (labels) The network takes as inputs 2D keypoints annotations. To create them run PifPaf over the saved images: ```sh -python -m openpifpaf.predict \ +python3 -m openpifpaf.predict \ --glob "data/kitti/images/*.png" \ --json-output \ --checkpoint=shufflenetv2k30 \ @@ -306,12 +311,12 @@ Once this step is complete, the below commands transform all the annotations int For MonoLoco++: ```sh -python -m monoloco.run prep --dir_ann +python3 -m monoloco.run prep --dir_ann ``` For MonStereo: ```sh -python -m monoloco.run prep --mode stereo --dir_ann +python3 -m monoloco.run prep --mode stereo --dir_ann ``` ## Collective Activity Dataset @@ -341,7 +346,7 @@ which for example change the name of all the jpg images in that folder adding th Pifpaf annotations should also be saved in a single folder and can be created with: ```sh -python -m openpifpaf.predict \ +python3 -m openpifpaf.predict \ --glob "data/collective_activity/images/*.jpg" \ --checkpoint=shufflenetv2k30 \ --instance-threshold=0.05 --seed-threshold 0.05 \--force-complete-pose \ @@ -381,7 +386,7 @@ To include also geometric baselines and MonoLoco, download a monoloco model, sav The evaluation file will run the model over all the annotations and compare the results with KITTI ground-truth and the downloaded baselines. For this run: ```sh -python -m monoloco.run eval \ +python3 -m monoloco.run eval \ --dir_ann \ --model data/outputs/monoloco_pp-210422-1601.pkl \ --generate \ @@ -411,7 +416,7 @@ Evaluation on this dataset is done with models trained on either KITTI or nuScen For optimal performances, we suggest the model trained on nuScenes teaser. ```sh -python -m monstereo.run eval \ +python3 -m monstereo.run eval \ --activity \ --dataset collective \ --model \ diff --git a/monoloco/predict.py b/monoloco/predict.py index 95c42b0..ac481cf 100644 --- a/monoloco/predict.py +++ b/monoloco/predict.py @@ -18,7 +18,6 @@ import torch import PIL import openpifpaf import openpifpaf.datasets as datasets -from openpifpaf.predict import processor_factory, preprocess_factory from openpifpaf import decoder, network, visualizer, show, logger try: import gdown @@ -162,12 +161,11 @@ def predict(args): n_dropout=args.n_dropout, p_dropout=args.dropout) - # data - processor, pifpaf_model = processor_factory(args) - preprocess = preprocess_factory(args) + # for openpifpaf predicitons + predictor = openpifpaf.Predictor(checkpoint=args.checkpoint) # data - data = datasets.ImageList(args.images, preprocess=preprocess) + data = datasets.ImageList(args.images, preprocess=predictor.preprocess) if args.mode == 'stereo': assert len( data.image_paths) % 2 == 0, "Odd number of images in a stereo setting" @@ -176,22 +174,19 @@ def predict(args): data, batch_size=args.batch_size, shuffle=False, pin_memory=False, collate_fn=datasets.collate_images_anns_meta) - for batch_i, (image_tensors_batch, _, meta_batch) in enumerate(data_loader): - pred_batch = processor.batch( - pifpaf_model, image_tensors_batch, device=args.device) + for batch_i, (_, _, meta_batch) in enumerate(data_loader): # unbatch (only for MonStereo) - for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)): + for idx, (preds, _, meta) in enumerate(predictor.dataset(data)): LOG.info('batch %d: %s', batch_i, meta['file_name']) - pred = [ann.inverse_transform(meta) for ann in pred] # Load image and collect pifpaf results if idx == 0: with open(meta_batch[0]['file_name'], 'rb') as f: cpu_image = PIL.Image.open(f).convert('RGB') pifpaf_outs = { - 'pred': pred, - 'left': [ann.json_data() for ann in pred], + 'pred': preds, + 'left': [ann.json_data() for ann in preds], 'image': cpu_image} # Set output image name @@ -208,7 +203,7 @@ def predict(args): # Only for MonStereo else: - pifpaf_outs['right'] = [ann.json_data() for ann in pred] + pifpaf_outs['right'] = [ann.json_data() for ann in preds] # 3D Predictions if args.mode != 'keypoints': diff --git a/monoloco/visuals/webcam.py b/monoloco/visuals/webcam.py index 5dc3e84..b098cec 100644 --- a/monoloco/visuals/webcam.py +++ b/monoloco/visuals/webcam.py @@ -17,9 +17,9 @@ try: except ImportError: cv2 = None +import openpifpaf from openpifpaf import decoder, network, visualizer, show, logger import openpifpaf.datasets as datasets -from openpifpaf.predict import processor_factory, preprocess_factory from ..visuals import Printer from ..network import Loco @@ -73,6 +73,7 @@ def factory_from_args(args): def webcam(args): assert args.mode in 'mono' + assert cv2 args, dic_models = factory_from_args(args) @@ -80,8 +81,8 @@ def webcam(args): net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - processor, pifpaf_model = processor_factory(args) - preprocess = preprocess_factory(args) + # for openpifpaf predicitons + predictor = openpifpaf.Predictor(checkpoint=args.checkpoint) # Start recording cam = cv2.VideoCapture(args.camera) @@ -93,28 +94,25 @@ def webcam(args): scale = (args.long_edge)/frame.shape[0] image = cv2.resize(frame, None, fx=scale, fy=scale) height, width, _ = image.shape - print('resized image size: {}'.format(image.shape)) + LOG.debug('resized image size: {}'.format(image.shape)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(image) data = datasets.PilImageList( - [pil_image], preprocess=preprocess) + [pil_image], preprocess=predictor.preprocess) data_loader = torch.utils.data.DataLoader( data, batch_size=1, shuffle=False, pin_memory=False, collate_fn=datasets.collate_images_anns_meta) - for (image_tensors_batch, _, meta_batch) in data_loader: - pred_batch = processor.batch( - pifpaf_model, image_tensors_batch, device=args.device) + for (_, _, _) in data_loader: - for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)): - pred = [ann.inverse_transform(meta) for ann in pred] + for idx, (preds, _, _) in enumerate(predictor.dataset(data)): if idx == 0: pifpaf_outs = { - 'pred': pred, - 'left': [ann.json_data() for ann in pred], + 'pred': preds, + 'left': [ann.json_data() for ann in preds], 'image': image} if not ret: @@ -122,7 +120,7 @@ def webcam(args): key = cv2.waitKey(1) if key % 256 == 27: # ESC pressed - print("Escape hit, closing...") + LOG.info("Escape hit, closing...") break kk, dic_gt = factory_for_gt(pil_image.size, focal_length=args.focal) @@ -140,11 +138,11 @@ def webcam(args): visualizer_mono = Visualizer(kk, args)(pil_image) # create it with the first image visualizer_mono.send(None) - print(dic_out) + LOG.debug(dic_out) visualizer_mono.send((pil_image, dic_out, pifpaf_outs)) end = time.time() - print("run-time: {:.2f} ms".format((end-start)*1000)) + LOG.info("run-time: {:.2f} ms".format((end-start)*1000)) cam.release() diff --git a/setup.py b/setup.py index 33ae6c7..8e1d283 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ setup( zip_safe=False, install_requires=[ - 'openpifpaf>=v0.12.1', + 'openpifpaf>=v0.12.10', 'matplotlib', ], extras_require={