Update to openpifpaf v0.12.10 (#63)
* Updated for openpifpaf v0.12.10 * Linting and better logging * clean up * better comment * fix
This commit is contained in:
parent
ddeb860f81
commit
e71a2e4905
43
README.md
43
README.md
@ -125,7 +125,7 @@ If you provide a ground-truth json file to compare the predictions of the networ
|
|||||||
For an example image, run the following command:
|
For an example image, run the following command:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python -m monoloco.run predict docs/test_002282.png \
|
python3 -m monoloco.run predict docs/test_002282.png \
|
||||||
--path_gt names-kitti-200615-1022.json \
|
--path_gt names-kitti-200615-1022.json \
|
||||||
-o <output directory> \
|
-o <output directory> \
|
||||||
--long-edge <rescale the image by providing dimension of long side>
|
--long-edge <rescale the image by providing dimension of long side>
|
||||||
@ -140,7 +140,7 @@ To show all the instances estimated by MonoLoco add the argument `--show_all` to
|
|||||||
|
|
||||||
It is also possible to run [openpifpaf](https://github.com/vita-epfl/openpifpaf) directly
|
It is also possible to run [openpifpaf](https://github.com/vita-epfl/openpifpaf) directly
|
||||||
by using `--mode keypoints`. All the other pifpaf arguments are also supported
|
by using `--mode keypoints`. All the other pifpaf arguments are also supported
|
||||||
and can be checked with `python -m monoloco.run predict --help`.
|
and can be checked with `python3 -m monoloco.run predict --help`.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@ -178,7 +178,7 @@ To visualize social distancing compliance, simply add the argument `social_dista
|
|||||||
Threshold distance and radii (for F-formations) can be set using `--threshold-dist` and `--radii`, respectively.
|
Threshold distance and radii (for F-formations) can be set using `--threshold-dist` and `--radii`, respectively.
|
||||||
|
|
||||||
For more info, run:
|
For more info, run:
|
||||||
`python -m monoloco.run predict --help`
|
`python3 -m monoloco.run predict --help`
|
||||||
|
|
||||||
**Examples** <br>
|
**Examples** <br>
|
||||||
An example from the Collective Activity Dataset is provided below.
|
An example from the Collective Activity Dataset is provided below.
|
||||||
@ -188,11 +188,11 @@ An example from the Collective Activity Dataset is provided below.
|
|||||||
To visualize social distancing run the below, command:
|
To visualize social distancing run the below, command:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
pip install scipy
|
pip3 install scipy
|
||||||
```
|
```
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python -m monoloco.run predict docs/test_frame0032.jpg \
|
python3 -m monoloco.run predict docs/test_frame0032.jpg \
|
||||||
--activities social_distance --output_types front bird
|
--activities social_distance --output_types front bird
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -203,13 +203,14 @@ To detect raised hand, you can add the argument `--activities raise_hand` to the
|
|||||||
|
|
||||||
For example, the below image is obtained with:
|
For example, the below image is obtained with:
|
||||||
```sh
|
```sh
|
||||||
python -m monoloco.run predict --activities raise_hand social_distance output_types front
|
python3 -m monoloco.run predict docs/raising_hand.jpg \
|
||||||
|
--activities raise_hand social_distance --output_types front
|
||||||
```
|
```
|
||||||
|
|
||||||
<img src="docs/out_raising_hand.jpg.front.jpg" width="500"/>
|
<img src="docs/out_raising_hand.jpg.front.jpg" width="500"/>
|
||||||
|
|
||||||
For more info, run:
|
For more info, run:
|
||||||
`python -m monoloco.run predict --help`
|
`python3 -m monoloco.run predict --help`
|
||||||
|
|
||||||
## D) Orientation and Bounding Box dimensions
|
## D) Orientation and Bounding Box dimensions
|
||||||
The network estimates orientation and box dimensions as well. Results are saved in a json file when using the command
|
The network estimates orientation and box dimensions as well. Results are saved in a json file when using the command
|
||||||
@ -218,11 +219,14 @@ The network estimates orientation and box dimensions as well. Results are saved
|
|||||||
|
|
||||||
## E) Webcam
|
## E) Webcam
|
||||||
You can use the webcam as input by using the `--webcam` argument. By default the `--z_max` is set to 10 while using the webcam and the `--long-edge` is set to 144. If multiple webcams are plugged in you can choose between them using `--camera`, for instance to use the second camera you can add `--camera 1`.
|
You can use the webcam as input by using the `--webcam` argument. By default the `--z_max` is set to 10 while using the webcam and the `--long-edge` is set to 144. If multiple webcams are plugged in you can choose between them using `--camera`, for instance to use the second camera you can add `--camera 1`.
|
||||||
|
You also need to install `opencv-python` to use this feature :
|
||||||
|
```sh
|
||||||
|
pip3 install opencv-python
|
||||||
|
```
|
||||||
Example command:
|
Example command:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python -m monoloco.run predict --webcam \
|
python3 -m monoloco.run predict --webcam \
|
||||||
--activities raise_hand social_distance
|
--activities raise_hand social_distance
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -231,21 +235,22 @@ We train on the KITTI dataset (MonoLoco/Monoloco++/MonStereo) or the nuScenes da
|
|||||||
|
|
||||||
Results for [MonoLoco++](###Tables) are obtained with:
|
Results for [MonoLoco++](###Tables) are obtained with:
|
||||||
|
|
||||||
```
|
```sh
|
||||||
python -m monoloco.run train --joints data/arrays/joints-kitti-mono-210422-1600.json
|
python3 -m monoloco.run train --joints data/arrays/joints-kitti-mono-210422-1600.json
|
||||||
```
|
```
|
||||||
|
|
||||||
While for the [MonStereo](###Tables) results run:
|
While for the [MonStereo](###Tables) results run:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python -m monoloco.run train --joints data/arrays/joints-kitti-stereo-210422-1601.json --lr 0.003 --mode stereo
|
python3 -m monoloco.run train --joints data/arrays/joints-kitti-stereo-210422-1601.json \
|
||||||
|
--lr 0.003 --mode stereo
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are interested in the original results of the MonoLoco ICCV article (now improved with MonoLoco++), please refer to the tag v0.4.9 in this repository.
|
If you are interested in the original results of the MonoLoco ICCV article (now improved with MonoLoco++), please refer to the tag v0.4.9 in this repository.
|
||||||
|
|
||||||
Finally, for a more extensive list of available parameters, run:
|
Finally, for a more extensive list of available parameters, run:
|
||||||
|
|
||||||
`python -m monstereo.run train --help`
|
`python3 -m monstereo.run train --help`
|
||||||
|
|
||||||
<br />
|
<br />
|
||||||
|
|
||||||
@ -284,7 +289,7 @@ Download kitti images (from left and right cameras), ground-truth files (labels)
|
|||||||
The network takes as inputs 2D keypoints annotations. To create them run PifPaf over the saved images:
|
The network takes as inputs 2D keypoints annotations. To create them run PifPaf over the saved images:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python -m openpifpaf.predict \
|
python3 -m openpifpaf.predict \
|
||||||
--glob "data/kitti/images/*.png" \
|
--glob "data/kitti/images/*.png" \
|
||||||
--json-output <directory to contain predictions> \
|
--json-output <directory to contain predictions> \
|
||||||
--checkpoint=shufflenetv2k30 \
|
--checkpoint=shufflenetv2k30 \
|
||||||
@ -306,12 +311,12 @@ Once this step is complete, the below commands transform all the annotations int
|
|||||||
|
|
||||||
For MonoLoco++:
|
For MonoLoco++:
|
||||||
```sh
|
```sh
|
||||||
python -m monoloco.run prep --dir_ann <directory that contains annotations>
|
python3 -m monoloco.run prep --dir_ann <directory that contains annotations>
|
||||||
```
|
```
|
||||||
|
|
||||||
For MonStereo:
|
For MonStereo:
|
||||||
```sh
|
```sh
|
||||||
python -m monoloco.run prep --mode stereo --dir_ann <directory that contains left annotations>
|
python3 -m monoloco.run prep --mode stereo --dir_ann <directory that contains left annotations>
|
||||||
```
|
```
|
||||||
|
|
||||||
## Collective Activity Dataset
|
## Collective Activity Dataset
|
||||||
@ -341,7 +346,7 @@ which for example change the name of all the jpg images in that folder adding th
|
|||||||
Pifpaf annotations should also be saved in a single folder and can be created with:
|
Pifpaf annotations should also be saved in a single folder and can be created with:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python -m openpifpaf.predict \
|
python3 -m openpifpaf.predict \
|
||||||
--glob "data/collective_activity/images/*.jpg" \
|
--glob "data/collective_activity/images/*.jpg" \
|
||||||
--checkpoint=shufflenetv2k30 \
|
--checkpoint=shufflenetv2k30 \
|
||||||
--instance-threshold=0.05 --seed-threshold 0.05 \--force-complete-pose \
|
--instance-threshold=0.05 --seed-threshold 0.05 \--force-complete-pose \
|
||||||
@ -381,7 +386,7 @@ To include also geometric baselines and MonoLoco, download a monoloco model, sav
|
|||||||
The evaluation file will run the model over all the annotations and compare the results with KITTI ground-truth and the downloaded baselines. For this run:
|
The evaluation file will run the model over all the annotations and compare the results with KITTI ground-truth and the downloaded baselines. For this run:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python -m monoloco.run eval \
|
python3 -m monoloco.run eval \
|
||||||
--dir_ann <annotation directory> \
|
--dir_ann <annotation directory> \
|
||||||
--model data/outputs/monoloco_pp-210422-1601.pkl \
|
--model data/outputs/monoloco_pp-210422-1601.pkl \
|
||||||
--generate \
|
--generate \
|
||||||
@ -411,7 +416,7 @@ Evaluation on this dataset is done with models trained on either KITTI or nuScen
|
|||||||
For optimal performances, we suggest the model trained on nuScenes teaser.
|
For optimal performances, we suggest the model trained on nuScenes teaser.
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python -m monstereo.run eval \
|
python3 -m monstereo.run eval \
|
||||||
--activity \
|
--activity \
|
||||||
--dataset collective \
|
--dataset collective \
|
||||||
--model <path to the model> \
|
--model <path to the model> \
|
||||||
|
|||||||
@ -18,7 +18,6 @@ import torch
|
|||||||
import PIL
|
import PIL
|
||||||
import openpifpaf
|
import openpifpaf
|
||||||
import openpifpaf.datasets as datasets
|
import openpifpaf.datasets as datasets
|
||||||
from openpifpaf.predict import processor_factory, preprocess_factory
|
|
||||||
from openpifpaf import decoder, network, visualizer, show, logger
|
from openpifpaf import decoder, network, visualizer, show, logger
|
||||||
try:
|
try:
|
||||||
import gdown
|
import gdown
|
||||||
@ -162,12 +161,11 @@ def predict(args):
|
|||||||
n_dropout=args.n_dropout,
|
n_dropout=args.n_dropout,
|
||||||
p_dropout=args.dropout)
|
p_dropout=args.dropout)
|
||||||
|
|
||||||
# data
|
# for openpifpaf predicitons
|
||||||
processor, pifpaf_model = processor_factory(args)
|
predictor = openpifpaf.Predictor(checkpoint=args.checkpoint)
|
||||||
preprocess = preprocess_factory(args)
|
|
||||||
|
|
||||||
# data
|
# data
|
||||||
data = datasets.ImageList(args.images, preprocess=preprocess)
|
data = datasets.ImageList(args.images, preprocess=predictor.preprocess)
|
||||||
if args.mode == 'stereo':
|
if args.mode == 'stereo':
|
||||||
assert len(
|
assert len(
|
||||||
data.image_paths) % 2 == 0, "Odd number of images in a stereo setting"
|
data.image_paths) % 2 == 0, "Odd number of images in a stereo setting"
|
||||||
@ -176,22 +174,19 @@ def predict(args):
|
|||||||
data, batch_size=args.batch_size, shuffle=False,
|
data, batch_size=args.batch_size, shuffle=False,
|
||||||
pin_memory=False, collate_fn=datasets.collate_images_anns_meta)
|
pin_memory=False, collate_fn=datasets.collate_images_anns_meta)
|
||||||
|
|
||||||
for batch_i, (image_tensors_batch, _, meta_batch) in enumerate(data_loader):
|
for batch_i, (_, _, meta_batch) in enumerate(data_loader):
|
||||||
pred_batch = processor.batch(
|
|
||||||
pifpaf_model, image_tensors_batch, device=args.device)
|
|
||||||
|
|
||||||
# unbatch (only for MonStereo)
|
# unbatch (only for MonStereo)
|
||||||
for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
|
for idx, (preds, _, meta) in enumerate(predictor.dataset(data)):
|
||||||
LOG.info('batch %d: %s', batch_i, meta['file_name'])
|
LOG.info('batch %d: %s', batch_i, meta['file_name'])
|
||||||
pred = [ann.inverse_transform(meta) for ann in pred]
|
|
||||||
|
|
||||||
# Load image and collect pifpaf results
|
# Load image and collect pifpaf results
|
||||||
if idx == 0:
|
if idx == 0:
|
||||||
with open(meta_batch[0]['file_name'], 'rb') as f:
|
with open(meta_batch[0]['file_name'], 'rb') as f:
|
||||||
cpu_image = PIL.Image.open(f).convert('RGB')
|
cpu_image = PIL.Image.open(f).convert('RGB')
|
||||||
pifpaf_outs = {
|
pifpaf_outs = {
|
||||||
'pred': pred,
|
'pred': preds,
|
||||||
'left': [ann.json_data() for ann in pred],
|
'left': [ann.json_data() for ann in preds],
|
||||||
'image': cpu_image}
|
'image': cpu_image}
|
||||||
|
|
||||||
# Set output image name
|
# Set output image name
|
||||||
@ -208,7 +203,7 @@ def predict(args):
|
|||||||
|
|
||||||
# Only for MonStereo
|
# Only for MonStereo
|
||||||
else:
|
else:
|
||||||
pifpaf_outs['right'] = [ann.json_data() for ann in pred]
|
pifpaf_outs['right'] = [ann.json_data() for ann in preds]
|
||||||
|
|
||||||
# 3D Predictions
|
# 3D Predictions
|
||||||
if args.mode != 'keypoints':
|
if args.mode != 'keypoints':
|
||||||
|
|||||||
@ -17,9 +17,9 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
cv2 = None
|
cv2 = None
|
||||||
|
|
||||||
|
import openpifpaf
|
||||||
from openpifpaf import decoder, network, visualizer, show, logger
|
from openpifpaf import decoder, network, visualizer, show, logger
|
||||||
import openpifpaf.datasets as datasets
|
import openpifpaf.datasets as datasets
|
||||||
from openpifpaf.predict import processor_factory, preprocess_factory
|
|
||||||
|
|
||||||
from ..visuals import Printer
|
from ..visuals import Printer
|
||||||
from ..network import Loco
|
from ..network import Loco
|
||||||
@ -73,6 +73,7 @@ def factory_from_args(args):
|
|||||||
def webcam(args):
|
def webcam(args):
|
||||||
|
|
||||||
assert args.mode in 'mono'
|
assert args.mode in 'mono'
|
||||||
|
assert cv2
|
||||||
|
|
||||||
args, dic_models = factory_from_args(args)
|
args, dic_models = factory_from_args(args)
|
||||||
|
|
||||||
@ -80,8 +81,8 @@ def webcam(args):
|
|||||||
net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device,
|
net = Loco(model=dic_models[args.mode], mode=args.mode, device=args.device,
|
||||||
n_dropout=args.n_dropout, p_dropout=args.dropout)
|
n_dropout=args.n_dropout, p_dropout=args.dropout)
|
||||||
|
|
||||||
processor, pifpaf_model = processor_factory(args)
|
# for openpifpaf predicitons
|
||||||
preprocess = preprocess_factory(args)
|
predictor = openpifpaf.Predictor(checkpoint=args.checkpoint)
|
||||||
|
|
||||||
# Start recording
|
# Start recording
|
||||||
cam = cv2.VideoCapture(args.camera)
|
cam = cv2.VideoCapture(args.camera)
|
||||||
@ -93,28 +94,25 @@ def webcam(args):
|
|||||||
scale = (args.long_edge)/frame.shape[0]
|
scale = (args.long_edge)/frame.shape[0]
|
||||||
image = cv2.resize(frame, None, fx=scale, fy=scale)
|
image = cv2.resize(frame, None, fx=scale, fy=scale)
|
||||||
height, width, _ = image.shape
|
height, width, _ = image.shape
|
||||||
print('resized image size: {}'.format(image.shape))
|
LOG.debug('resized image size: {}'.format(image.shape))
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
pil_image = Image.fromarray(image)
|
pil_image = Image.fromarray(image)
|
||||||
|
|
||||||
data = datasets.PilImageList(
|
data = datasets.PilImageList(
|
||||||
[pil_image], preprocess=preprocess)
|
[pil_image], preprocess=predictor.preprocess)
|
||||||
|
|
||||||
data_loader = torch.utils.data.DataLoader(
|
data_loader = torch.utils.data.DataLoader(
|
||||||
data, batch_size=1, shuffle=False,
|
data, batch_size=1, shuffle=False,
|
||||||
pin_memory=False, collate_fn=datasets.collate_images_anns_meta)
|
pin_memory=False, collate_fn=datasets.collate_images_anns_meta)
|
||||||
|
|
||||||
for (image_tensors_batch, _, meta_batch) in data_loader:
|
for (_, _, _) in data_loader:
|
||||||
pred_batch = processor.batch(
|
|
||||||
pifpaf_model, image_tensors_batch, device=args.device)
|
|
||||||
|
|
||||||
for idx, (pred, meta) in enumerate(zip(pred_batch, meta_batch)):
|
for idx, (preds, _, _) in enumerate(predictor.dataset(data)):
|
||||||
pred = [ann.inverse_transform(meta) for ann in pred]
|
|
||||||
|
|
||||||
if idx == 0:
|
if idx == 0:
|
||||||
pifpaf_outs = {
|
pifpaf_outs = {
|
||||||
'pred': pred,
|
'pred': preds,
|
||||||
'left': [ann.json_data() for ann in pred],
|
'left': [ann.json_data() for ann in preds],
|
||||||
'image': image}
|
'image': image}
|
||||||
|
|
||||||
if not ret:
|
if not ret:
|
||||||
@ -122,7 +120,7 @@ def webcam(args):
|
|||||||
key = cv2.waitKey(1)
|
key = cv2.waitKey(1)
|
||||||
if key % 256 == 27:
|
if key % 256 == 27:
|
||||||
# ESC pressed
|
# ESC pressed
|
||||||
print("Escape hit, closing...")
|
LOG.info("Escape hit, closing...")
|
||||||
break
|
break
|
||||||
|
|
||||||
kk, dic_gt = factory_for_gt(pil_image.size, focal_length=args.focal)
|
kk, dic_gt = factory_for_gt(pil_image.size, focal_length=args.focal)
|
||||||
@ -140,11 +138,11 @@ def webcam(args):
|
|||||||
visualizer_mono = Visualizer(kk, args)(pil_image) # create it with the first image
|
visualizer_mono = Visualizer(kk, args)(pil_image) # create it with the first image
|
||||||
visualizer_mono.send(None)
|
visualizer_mono.send(None)
|
||||||
|
|
||||||
print(dic_out)
|
LOG.debug(dic_out)
|
||||||
visualizer_mono.send((pil_image, dic_out, pifpaf_outs))
|
visualizer_mono.send((pil_image, dic_out, pifpaf_outs))
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
print("run-time: {:.2f} ms".format((end-start)*1000))
|
LOG.info("run-time: {:.2f} ms".format((end-start)*1000))
|
||||||
|
|
||||||
cam.release()
|
cam.release()
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user