diff --git a/monstereo/activity.py b/monstereo/activity.py index aec94ed..a106971 100644 --- a/monstereo/activity.py +++ b/monstereo/activity.py @@ -46,7 +46,9 @@ def social_interactions(idx, centers, angles, dds, stds=None, social_distance=Fa # Samples distance dds = torch.tensor(dds).view(-1, 1) stds = torch.tensor(stds).view(-1, 1) - # stds = get_task_error(dds) # similar results to MonoLoco but lower true positive + # stds_te = get_task_error(dds) # similar results to MonoLoco but lower true positive + # print(f'ML : {float(torch.mean(stds))}\n') + # print(f'Task Error: {float(torch.mean(stds_te))}') laplace_d = torch.cat((dds, stds), dim=1) samples_d = laplace_sampling(laplace_d, n_samples=n_samples) diff --git a/monstereo/eval/eval_activity.py b/monstereo/eval/eval_activity.py index b485cb9..55d01b4 100644 --- a/monstereo/eval/eval_activity.py +++ b/monstereo/eval/eval_activity.py @@ -24,6 +24,8 @@ class ActivityEvaluator: def __init__(self, args): self.dir_ann = args.dir_ann + assert self.dir_ann is not None and os.path.exists(self.dir_ann), \ + "Annotation directory not provided / does not exist" assert os.listdir(self.dir_ann), "Annotation directory is empty" # COLLECTIVE ACTIVITY DATASET (talking) @@ -32,7 +34,7 @@ class ActivityEvaluator: self.sequences = ['seq02', 'seq14', 'seq12', 'seq13', 'seq11', 'seq36'] # folders_collective = ['seq02'] self.dir_data = 'data/activity/dataset' - self.THRESHOLD_PROB = 0.2 # Concordance for samples + self.THRESHOLD_PROB = 0.25 # Concordance for samples self.THRESHOLD_DIST = 2 # Threshold to check distance of people self.RADII = (0.3, 0.5) # expected radii of the o-space self.PIFPAF_CONF = 0.3 @@ -95,6 +97,8 @@ class ActivityEvaluator: self.estimate_activity(dic_out, matches, ys_gt, categories=categories) # Print Results + acc = accuracy_score(self.all_gt[seq], self.all_pred[seq]) + print(f"Accuracy of category {seq}: {100*acc:.2f}%") cout_results(self.cnt, self.all_gt, self.all_pred, categories=self.sequences) def eval_kitti(self): @@ -225,8 +229,9 @@ def cout_results(cnt, all_gt, all_pred, categories=()): # Final Accuracy acc = accuracy_score(all_gt['all'], all_pred['all']) + recall = cnt['pred']['all'] / cnt['gt']['all'] * 100 # only predictions that match a ground-truth are included print('-' * 80) - print("Final Accuracy: {:.2f}%".format(acc * 100)) + print(f"Final Accuracy: {acc * 100:.2f} Final Recall:{recall:.2f}") print('-' * 80) diff --git a/monstereo/eval/eval_kitti.py b/monstereo/eval/eval_kitti.py index f8dc96a..9ad2710 100644 --- a/monstereo/eval/eval_kitti.py +++ b/monstereo/eval/eval_kitti.py @@ -55,7 +55,7 @@ class EvalKitti: for method in self.methods} # Set thresholds to obtain comparable recall - self.dic_thresh_conf['monopsr'] += 0.3 + self.dic_thresh_conf['monopsr'] += 0.4 self.dic_thresh_conf['e2e-pl'] = -100 self.dic_thresh_conf['oc-stereo'] = -100 self.dic_thresh_conf['smoke'] = -100 diff --git a/monstereo/eval/generate_kitti.py b/monstereo/eval/generate_kitti.py index 4be8618..0d7b1c4 100644 --- a/monstereo/eval/generate_kitti.py +++ b/monstereo/eval/generate_kitti.py @@ -240,8 +240,8 @@ def save_txts(path_txt, all_inputs, all_outputs, all_params, mode='monoloco', ca if mode == 'monstereo': conf_scale = 0.03 elif mode == 'monoloco_pp': - conf_scale = 0.033 - # conf_scale = 0.036 nuScenes for having same recall + # conf_scale = 0.033 + conf_scale = 0.035 # nuScenes for having same recall else: conf_scale = 0.055 conf = conf_scale * (uv_box[-1]) / (bi / math.sqrt(xx ** 2 + yy ** 2 + zz ** 2)) diff --git a/monstereo/network/pifpaf.py b/monstereo/network/pifpaf.py deleted file mode 100644 index 6209c04..0000000 --- a/monstereo/network/pifpaf.py +++ /dev/null @@ -1,102 +0,0 @@ - -import glob - -import numpy as np -import torchvision -import torch -from PIL import Image, ImageFile -from openpifpaf.network import nets -from openpifpaf import decoder - -from .process import image_transform - - -class ImageList(torch.utils.data.Dataset): - """It defines transformations to apply to images and outputs of the dataloader""" - def __init__(self, image_paths, scale): - self.image_paths = image_paths - self.image_paths.sort() - self.scale = scale - - def __getitem__(self, index): - image_path = self.image_paths[index] - ImageFile.LOAD_TRUNCATED_IMAGES = True - with open(image_path, 'rb') as f: - image = Image.open(f).convert('RGB') - - if self.scale > 1.01 or self.scale < 0.99: - image = torchvision.transforms.functional.resize(image, - (round(self.scale * image.size[1]), - round(self.scale * image.size[0])), - interpolation=Image.BICUBIC) - # PIL images are not iterables - original_image = torchvision.transforms.functional.to_tensor(image) # 0-255 --> 0-1 - image = image_transform(image) - - return image_path, original_image, image - - def __len__(self): - return len(self.image_paths) - - -def factory_from_args(args): - - # Merge the model_pifpaf argument - if not args.checkpoint: - args.checkpoint = 'resnet152' # Default model Resnet 152 - # glob - if args.glob: - args.images += glob.glob(args.glob) - if not args.images: - raise Exception("no image files given") - - # add args.device - args.device = torch.device('cpu') - args.pin_memory = False - if torch.cuda.is_available(): - args.device = torch.device('cuda') - args.pin_memory = True - - # Add num_workers - args.loader_workers = 8 - - # Add visualization defaults - args.figure_width = 10 - args.dpi_factor = 1.0 - - return args - - -class PifPaf: - def __init__(self, args): - """Instanciate the mdodel""" - factory_from_args(args) - model_pifpaf, _ = nets.factory_from_args(args) - model_pifpaf = model_pifpaf.to(args.device) - self.processor = decoder.factory_from_args(args, model_pifpaf) - self.keypoints_whole = [] - - # Scale the keypoints to the original image size for printing (if not webcam) - self.scale_np = np.array([args.scale, args.scale, 1] * 17).reshape(17, 3) - - def fields(self, processed_images): - """Encoder for pif and paf fields""" - fields_batch = self.processor.fields(processed_images) - return fields_batch - - def forward(self, image, processed_image_cpu, fields): - """Decoder, from pif and paf fields to keypoints""" - self.processor.set_cpu_image(image, processed_image_cpu) - keypoint_sets, scores = self.processor.keypoint_sets(fields) - - if keypoint_sets.size > 0: - self.keypoints_whole.append(np.around((keypoint_sets / self.scale_np), 1) - .reshape(keypoint_sets.shape[0], -1).tolist()) - - pifpaf_out = [ - {'keypoints': np.around(kps / self.scale_np, 1).reshape(-1).tolist(), - 'bbox': [np.min(kps[:, 0]) / self.scale_np[0, 0], np.min(kps[:, 1]) / self.scale_np[0, 0], - np.max(kps[:, 0]) / self.scale_np[0, 0], np.max(kps[:, 1]) / self.scale_np[0, 0]]} - for kps in keypoint_sets - ] - return keypoint_sets, scores, pifpaf_out diff --git a/monstereo/network/process.py b/monstereo/network/process.py index d1a4760..1a9c5ea 100644 --- a/monstereo/network/process.py +++ b/monstereo/network/process.py @@ -82,7 +82,7 @@ def factory_for_gt(im_size, name=None, path_gt=None, verbose=True): dic_gt = None x_factor = im_size[0] / 1600 y_factor = im_size[1] / 900 - pixel_factor = (x_factor + y_factor) / 2 # 1.7 for MOT + pixel_factor = (x_factor + y_factor) / 1.75 # 1.7 for MOT # pixel_factor = 1 if im_size[0] / im_size[1] > 2.5: kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration diff --git a/monstereo/predict.py b/monstereo/predict.py deleted file mode 100644 index d869386..0000000 --- a/monstereo/predict.py +++ /dev/null @@ -1,146 +0,0 @@ - -# pylint: disable=too-many-statements, too-many-branches, undefined-loop-variable - -import os -import json -from collections import defaultdict - - -import torch -from PIL import Image - -from .visuals.printer import Printer -from .visuals.pifpaf_show import KeypointPainter, image_canvas -from .network import PifPaf, ImageList, Loco -from .network.process import factory_for_gt, preprocess_pifpaf - - -def predict(args): - - cnt = 0 - - # Load Models - pifpaf = PifPaf(args) - assert args.mode in ('mono', 'stereo', 'pifpaf') - - if 'mono' in args.mode: - monoloco = Loco(model=args.model, net='monoloco_pp', - device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - - if 'stereo' in args.mode: - monstereo = Loco(model=args.model, net='monstereo', - device=args.device, n_dropout=args.n_dropout, p_dropout=args.dropout) - - # data - data = ImageList(args.images, scale=args.scale) - if args.mode == 'stereo': - assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting" - bs = 2 - else: - bs = 1 - data_loader = torch.utils.data.DataLoader( - data, batch_size=bs, shuffle=False, - pin_memory=args.pin_memory, num_workers=args.loader_workers) - - for idx, (image_paths, image_tensors, processed_images_cpu) in enumerate(data_loader): - images = image_tensors.permute(0, 2, 3, 1) - - processed_images = processed_images_cpu.to(args.device, non_blocking=True) - fields_batch = pifpaf.fields(processed_images) - - # unbatch stereo pair - for ii, (image_path, image, processed_image_cpu, fields) in enumerate(zip( - image_paths, images, processed_images_cpu, fields_batch)): - - if args.output_directory is None: - splits = os.path.split(image_paths[0]) - output_path = os.path.join(splits[0], 'out_' + splits[1]) - else: - file_name = os.path.basename(image_paths[0]) - output_path = os.path.join(args.output_directory, 'out_' + file_name) - print('image', idx, image_path, output_path) - keypoint_sets, scores, pifpaf_out = pifpaf.forward(image, processed_image_cpu, fields) - - if ii == 0: - pifpaf_outputs = [keypoint_sets, scores, pifpaf_out] # keypoints_sets and scores for pifpaf printing - images_outputs = [image] # List of 1 or 2 elements with pifpaf tensor and monoloco original image - pifpaf_outs = {'left': pifpaf_out} - image_path_l = image_path - else: - pifpaf_outs['right'] = pifpaf_out - - if args.mode in ('stereo', 'mono'): - # Extract calibration matrix and ground truth file if present - with open(image_path_l, 'rb') as f: - pil_image = Image.open(f).convert('RGB') - images_outputs.append(pil_image) - - im_name = os.path.basename(image_path_l) - im_size = (float(image.size()[1] / args.scale), float(image.size()[0] / args.scale)) # Original - kk, dic_gt = factory_for_gt(im_size, name=im_name, path_gt=args.path_gt) - - # Preprocess pifpaf outputs and run monoloco - boxes, keypoints = preprocess_pifpaf(pifpaf_outs['left'], im_size, enlarge_boxes=False) - - if args.mode == 'mono': - print("Prediction with MonoLoco++") - dic_out = monoloco.forward(keypoints, kk) - dic_out = monoloco.post_process(dic_out, boxes, keypoints, kk, dic_gt) - - else: - print("Prediction with MonStereo") - boxes_r, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size) - dic_out = monstereo.forward(keypoints, kk, keypoints_r=keypoints_r) - dic_out = monstereo.post_process(dic_out, boxes, keypoints, kk, dic_gt) - - else: - dic_out = defaultdict(list) - kk = None - - factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=dic_out, kk=kk) - print('Image {}\n'.format(cnt) + '-' * 120) - cnt += 1 - - -def factory_outputs(args, images_outputs, output_path, pifpaf_outputs, dic_out=None, kk=None): - """Output json files or images according to the choice""" - - # Save json file - if args.mode == 'pifpaf': - keypoint_sets, scores, pifpaf_out = pifpaf_outputs[:] - - # Visualizer - keypoint_painter = KeypointPainter(show_box=False) - skeleton_painter = KeypointPainter(show_box=False, color_connections=True, markersize=1, linewidth=4) - - if 'json' in args.output_types and keypoint_sets.size > 0: - with open(output_path + '.pifpaf.json', 'w') as f: - json.dump(pifpaf_out, f) - - if 'keypoints' in args.output_types: - with image_canvas(images_outputs[0], - output_path + '.keypoints.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - keypoint_painter.keypoints(ax, keypoint_sets) - - if 'skeleton' in args.output_types: - with image_canvas(images_outputs[0], - output_path + '.skeleton.png', - show=args.show, - fig_width=args.figure_width, - dpi_factor=args.dpi_factor) as ax: - skeleton_painter.keypoints(ax, keypoint_sets, scores=scores) - - else: - if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])): - print(output_path) - if dic_out['boxes']: # Only print in case of detections - printer = Printer(images_outputs[1], output_path, kk, args) - figures, axes = printer.factory_axes() - printer.draw(figures, axes, dic_out, images_outputs[1]) - - if 'json' in args.output_types: - with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff: - json.dump(dic_out, ff) diff --git a/monstereo/run.py b/monstereo/run.py index 81fe0f5..535cd9c 100644 --- a/monstereo/run.py +++ b/monstereo/run.py @@ -48,6 +48,7 @@ def cli(): predict_parser.add_argument('--scale', default=1.0, type=float, help='change the scale of the image to preprocess') # Monoloco + predict_parser.add_argument('--net', help='Choose network: monoloco, monoloco_p, monoloco_pp, monstereo') predict_parser.add_argument('--model', help='path of MonoLoco model to load', required=True) predict_parser.add_argument('--hidden_size', type=int, help='Number of hidden units in the model', default=512) predict_parser.add_argument('--path_gt', help='path of json file with gt 3d localization',