activity experiment

2020-12-08 17:17:44 +01:00 · 2020-12-08 17:17:44 +01:00 · 98d1c29012
commit 98d1c29012
parent bf727c03c8
8 changed files with 15 additions and 255 deletions
--- a/monstereo/activity.py
+++ b/monstereo/activity.py
@ -46,7 +46,9 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa
        # Samples distance
        dds = torch.tensor(dds).view(-1, 1)
        stds = torch.tensor(stds).view(-1, 1)
-        # stds = get_task_error(dds)  # similar results to MonoLoco but lower true positive
+        # stds_te = get_task_error(dds)  # similar results to MonoLoco but lower true positive
+        # print(f'ML : {float(torch.mean(stds))}\n')
+        # print(f'Task Error: {float(torch.mean(stds_te))}')
        laplace_d = torch.cat((dds, stds), dim=1)
        samples_d = laplace_sampling(laplace_d, n_samples=n_samples)

--- a/monstereo/eval/eval_activity.py
+++ b/monstereo/eval/eval_activity.py
@ -24,6 +24,8 @@ class ActivityEvaluator:
    def __init__(self, args):

        self.dir_ann = args.dir_ann
+        assert self.dir_ann is not None and os.path.exists(self.dir_ann), \
+            "Annotation directory not provided / does not exist"
        assert os.listdir(self.dir_ann), "Annotation directory is empty"

        # COLLECTIVE ACTIVITY DATASET (talking)
@ -32,7 +34,7 @@ class ActivityEvaluator:
            self.sequences = ['seq02', 'seq14', 'seq12', 'seq13', 'seq11', 'seq36']
            # folders_collective = ['seq02']
            self.dir_data = 'data/activity/dataset'
-            self.THRESHOLD_PROB = 0.2  # Concordance for samples
+            self.THRESHOLD_PROB = 0.25  # Concordance for samples
            self.THRESHOLD_DIST = 2  # Threshold to check distance of people
            self.RADII = (0.3, 0.5)  # expected radii of the o-space
            self.PIFPAF_CONF = 0.3
@ -95,6 +97,8 @@ class ActivityEvaluator:
                self.estimate_activity(dic_out, matches, ys_gt, categories=categories)

        # Print Results
+            acc = accuracy_score(self.all_gt[seq], self.all_pred[seq])
+            print(f"Accuracy of category {seq}: {100*acc:.2f}%")
        cout_results(self.cnt, self.all_gt, self.all_pred, categories=self.sequences)

    def eval_kitti(self):
@ -225,8 +229,9 @@ def cout_results(cnt, all_gt, all_pred, categories=()):

    # Final Accuracy
    acc = accuracy_score(all_gt['all'], all_pred['all'])
+    recall = cnt['pred']['all'] / cnt['gt']['all'] * 100  # only predictions that match a ground-truth are included
    print('-' * 80)
-    print("Final Accuracy: {:.2f}%".format(acc * 100))
+    print(f"Final Accuracy: {acc * 100:.2f}      Final Recall:{recall:.2f}")
    print('-' * 80)


--- a/monstereo/eval/eval_kitti.py
+++ b/monstereo/eval/eval_kitti.py
@ -55,7 +55,7 @@ class EvalKitti:
                                for method in self.methods}

        # Set thresholds to obtain comparable recall
-        self.dic_thresh_conf['monopsr'] += 0.3
+        self.dic_thresh_conf['monopsr'] += 0.4
        self.dic_thresh_conf['e2e-pl'] = -100
        self.dic_thresh_conf['oc-stereo'] = -100
        self.dic_thresh_conf['smoke'] = -100
--- a/monstereo/eval/generate_kitti.py
+++ b/monstereo/eval/generate_kitti.py
@ -240,8 +240,8 @@ def save_txts(path_txt, all_inputs, all_outputs, all_params, mode='monoloco', ca
            if mode == 'monstereo':
                conf_scale = 0.03
            elif mode == 'monoloco_pp':
-                conf_scale = 0.033
-                # conf_scale = 0.036 nuScenes for having same recall
+                # conf_scale = 0.033
+                conf_scale = 0.035  # nuScenes for having same recall
            else:
                conf_scale = 0.055
            conf = conf_scale * (uv_box[-1]) / (bi / math.sqrt(xx ** 2 + yy ** 2 + zz ** 2))
--- a/monstereo/network/pifpaf.py
+++ b/monstereo/network/pifpaf.py
@ -1,102 +0,0 @@
-
-import glob
-
-import numpy as np
-import torchvision
-import torch
-from PIL import Image, ImageFile
-from openpifpaf.network import nets
-from openpifpaf import decoder
-
-from .process import image_transform
-
-
-class ImageList(torch.utils.data.Dataset):
-    """It defines transformations to apply to images and outputs of the dataloader"""
-    def __init__(self, image_paths, scale):
-        self.image_paths = image_paths
-        self.image_paths.sort()
-        self.scale = scale
-
-    def __getitem__(self, index):
-        image_path = self.image_paths[index]
-        ImageFile.LOAD_TRUNCATED_IMAGES = True
-        with open(image_path, 'rb') as f:
-            image = Image.open(f).convert('RGB')
-
-        if self.scale > 1.01 or self.scale < 0.99:
-            image = torchvision.transforms.functional.resize(image,
-                                                             (round(self.scale * image.size[1]),
-                                                              round(self.scale * image.size[0])),
-                                                             interpolation=Image.BICUBIC)
-        # PIL images are not iterables
-        original_image = torchvision.transforms.functional.to_tensor(image)  # 0-255 --> 0-1
-        image = image_transform(image)
-
-        return image_path, original_image, image
-
-    def __len__(self):
-        return len(self.image_paths)
-
-
-def factory_from_args(args):
-
-    # Merge the model_pifpaf argument
-    if not args.checkpoint:
-        args.checkpoint = 'resnet152'  # Default model Resnet 152
-    # glob
-    if args.glob:
-        args.images += glob.glob(args.glob)
-    if not args.images:
-        raise Exception("no image files given")
-
-    # add args.device
-    args.device = torch.device('cpu')
-    args.pin_memory = False
-    if torch.cuda.is_available():
-        args.device = torch.device('cuda')
-        args.pin_memory = True
-
-    # Add num_workers
-    args.loader_workers = 8
-
-    # Add visualization defaults
-    args.figure_width = 10
-    args.dpi_factor = 1.0
-
-    return args
-
-
-class PifPaf:
-    def __init__(self, args):
-        """Instanciate the mdodel"""
-        factory_from_args(args)
-        model_pifpaf, _ = nets.factory_from_args(args)
-        model_pifpaf = model_pifpaf.to(args.device)
-        self.processor = decoder.factory_from_args(args, model_pifpaf)
-        self.keypoints_whole = []
-
-        # Scale the keypoints to the original image size for printing (if not webcam)
-        self.scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3)
-
-    def fields(self, processed_images):
-        """Encoder for pif and paf fields"""
-        fields_batch = self.processor.fields(processed_images)
-        return fields_batch
-
-    def forward(self, image, processed_image_cpu, fields):
-        """Decoder, from pif and paf fields to keypoints"""
-        self.processor.set_cpu_image(image, processed_image_cpu)
-        keypoint_sets, scores = self.processor.keypoint_sets(fields)
-
-        if keypoint_sets.size > 0:
-            self.keypoints_whole.append(np.around((keypoint_sets / self.scale_np), 1)
-                                        .reshape(keypoint_sets.shape[0], -1).tolist())
-
-        pifpaf_out = [
-            {'keypoints': np.around(kps / self.scale_np, 1).reshape(-1).tolist(),
-             'bbox': [np.min(kps[:, 0]) / self.scale_np[0, 0], np.min(kps[:, 1]) / self.scale_np[0, 0],
-                      np.max(kps[:, 0]) / self.scale_np[0, 0], np.max(kps[:, 1]) / self.scale_np[0, 0]]}
-            for kps in keypoint_sets
-        ]
-        return keypoint_sets, scores, pifpaf_out
--- a/monstereo/network/process.py
+++ b/monstereo/network/process.py
@ -82,7 +82,7 @@ def factory_for_gt(im_size, name=None, path_gt=None, verbose=True):
        dic_gt = None
        x_factor = im_size[0] / 1600
        y_factor = im_size[1] / 900
-        pixel_factor = (x_factor + y_factor) / 2  # 1.7 for MOT
+        pixel_factor = (x_factor + y_factor) / 1.75  # 1.7 for MOT
        # pixel_factor = 1
        if im_size[0] / im_size[1] > 2.5:
            kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]]  # Kitti calibration
--- a/monstereo/predict.py
+++ b/monstereo/predict.py
@ -1,146 +0,0 @@
-
-# pylint: disable=too-many-statements, too-many-branches, undefined-loop-variable
-
-import os
-import json
-from collections import defaultdict
-
-
-import torch
-from PIL import Image
-
-from .visuals.printer import Printer
-from .visuals.pifpaf_show import KeypointPainter, image_canvas
-from .network import PifPaf, ImageList, Loco
-from .network.process import factory_for_gt, preprocess_pifpaf
-
-
-def predict(args):
-
-    cnt = 0
-
-    # Load Models
-    pifpaf = PifPaf(args)
-    assert args.mode in ('mono', 'stereo', 'pifpaf')
-
-    if 'mono' in args.mode:
-        monoloco = Loco(model=args.model, net='monoloco_pp',
-                        device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
-
-    if 'stereo' in args.mode:
-        monstereo = Loco(model=args.model, net='monstereo',
-                         device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout)
-
-    # data
-    data = ImageList(args.images, scale=args.scale)
-    if args.mode == 'stereo':
-        assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting"
-        bs = 2
-    else:
-        bs = 1
-    data_loader = torch.utils.data.DataLoader(
-        data, batch_size=bs, shuffle=False,
-        pin_memory=args.pin_memory, num_workers=args.loader_workers)
-
-    for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader):
-        images = image_tensors.permute(0, 2, 3, 1)
-
-        processed_images = processed_images_cpu.to(args.device, non_blocking=True)
-        fields_batch = pifpaf.fields(processed_images)
-
-        # unbatch stereo pair
-        for ii, (image_path, image, processed_image_cpu, fields) in enumerate(zip(
-                image_paths, images, processed_images_cpu, fields_batch)):
-
-            if args.output_directory is None:
-                splits = os.path.split(image_paths[0])
-                output_path = os.path.join(splits[0], 'out_' + splits[1])
-            else:
-                file_name = os.path.basename(image_paths[0])
-                output_path = os.path.join(args.output_directory, 'out_' + file_name)
-            print('image', idx, image_path, output_path)
-            keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields)
-
-            if ii == 0:
-                pifpaf_outputs = [keypoint_sets, scores, pifpaf_out]  # keypoints_sets and scores for pifpaf printing
-                images_outputs = [image]  # List of 1 or 2 elements with pifpaf tensor and monoloco original image
-                pifpaf_outs = {'left': pifpaf_out}
-                image_path_l = image_path
-            else:
-                pifpaf_outs['right'] = pifpaf_out
-
-        if args.mode in ('stereo', 'mono'):
-            # Extract calibration matrix and ground truth file if present
-            with open(image_path_l, 'rb') as f:
-                pil_image = Image.open(f).convert('RGB')
-                images_outputs.append(pil_image)
-
-            im_name = os.path.basename(image_path_l)
-            im_size = (float(image.size()[1] / args.scale), float(image.size()[0] / args.scale))  # Original
-            kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt)
-
-            # Preprocess pifpaf outputs and run monoloco
-            boxes, keypoints = preprocess_pifpaf(pifpaf_outs['left'], im_size, enlarge_boxes=False)
-
-            if args.mode == 'mono':
-                print("Prediction with MonoLoco++")
-                dic_out = monoloco.forward(keypoints, kk)
-                dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt)
-
-            else:
-                print("Prediction with MonStereo")
-                boxes_r, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size)
-                dic_out = monstereo.forward(keypoints, kk, keypoints_r=keypoints_r)
-                dic_out = monstereo.post_process(dic_out, boxes, keypoints, kk, dic_gt)
-
-        else:
-            dic_out = defaultdict(list)
-            kk = None
-
-        factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk)
-        print('Image {}\n'.format(cnt) + '-' * 120)
-        cnt += 1
-
-
-def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None):
-    """Output json files or images according to the choice"""
-
-    # Save json file
-    if args.mode == 'pifpaf':
-        keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:]
-
-        # Visualizer
-        keypoint_painter = KeypointPainter(show_box=False)
-        skeleton_painter = KeypointPainter(show_box=False, color_connections=True, markersize=1, linewidth=4)
-
-        if 'json' in args.output_types and keypoint_sets.size > 0:
-            with open(output_path + '.pifpaf.json', 'w') as f:
-                json.dump(pifpaf_out, f)
-
-        if 'keypoints' in args.output_types:
-            with image_canvas(images_outputs[0],
-                              output_path + '.keypoints.png',
-                              show=args.show,
-                              fig_width=args.figure_width,
-                              dpi_factor=args.dpi_factor) as ax:
-                keypoint_painter.keypoints(ax, keypoint_sets)
-
-        if 'skeleton' in args.output_types:
-            with image_canvas(images_outputs[0],
-                              output_path + '.skeleton.png',
-                              show=args.show,
-                              fig_width=args.figure_width,
-                              dpi_factor=args.dpi_factor) as ax:
-                skeleton_painter.keypoints(ax, keypoint_sets, scores=scores)
-
-    else:
-        if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])):
-            print(output_path)
-            if dic_out['boxes']:  # Only print in case of detections
-                printer = Printer(images_outputs[1], output_path, kk, args)
-                figures, axes = printer.factory_axes()
-                printer.draw(figures, axes, dic_out, images_outputs[1])
-
-        if 'json' in args.output_types:
-            with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
-                json.dump(dic_out, ff)
--- a/monstereo/run.py
+++ b/monstereo/run.py
@ -48,6 +48,7 @@ def cli():
    predict_parser.add_argument('--scale', default=1.0, type=float, help='change the scale of the image to preprocess')

    # Monoloco
+    predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo')
    predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True)
    predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512)
    predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',