monoloco/monstereo/eval/eval_kitti.py
2020-08-20 11:33:19 +02:00

433 lines
18 KiB
Python

"""
Evaluate MonStereo code on KITTI dataset using ALE metric
"""
# pylint: disable=attribute-defined-outside-init
import os
import math
import logging
import datetime
from collections import defaultdict
from tabulate import tabulate
from ..utils import get_iou_matches, get_task_error, get_pixel_error, check_conditions, \
get_difficulty, split_training, parse_ground_truth, get_iou_matches_matrix
from ..visuals import show_results, show_spread, show_task_error, show_box_plot
class EvalKitti:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
CLUSTERS = ('easy', 'moderate', 'hard', 'all', '3', '5', '7', '9', '11', '13', '15', '17', '19', '21', '23', '25',
'27', '29', '31', '49')
ALP_THRESHOLDS = ('<0.5m', '<1m', '<2m')
OUR_METHODS = ['geometric', 'monoloco', 'monoloco_pp', 'pose', 'reid', 'monstereo']
METHODS_MONO = ['m3d', 'monopsr']
METHODS_STEREO = ['3dop', 'psf', 'pseudo-lidar', 'e2e', 'oc-stereo']
BASELINES = ['task_error', 'pixel_error']
HEADERS = ('method', '<0.5', '<1m', '<2m', 'easy', 'moderate', 'hard', 'all')
CATEGORIES = ('pedestrian',)
def __init__(self, thresh_iou_monoloco=0.3, thresh_iou_base=0.3, thresh_conf_monoloco=0.2, thresh_conf_base=0.5,
verbose=False):
self.main_dir = os.path.join('data', 'kitti')
self.dir_gt = os.path.join(self.main_dir, 'gt')
self.methods = self.OUR_METHODS + self.METHODS_MONO + self.METHODS_STEREO
path_train = os.path.join('splits', 'kitti_train.txt')
path_val = os.path.join('splits', 'kitti_val.txt')
dir_logs = os.path.join('data', 'logs')
assert dir_logs, "No directory to save final statistics"
now = datetime.datetime.now()
now_time = now.strftime("%Y%m%d-%H%M")[2:]
self.path_results = os.path.join(dir_logs, 'eval-' + now_time + '.json')
self.verbose = verbose
self.dic_thresh_iou = {method: (thresh_iou_monoloco if method in self.OUR_METHODS
else thresh_iou_base)
for method in self.methods}
self.dic_thresh_conf = {method: (thresh_conf_monoloco if method in self.OUR_METHODS
else thresh_conf_base)
for method in self.methods}
self.dic_thresh_conf['monopsr'] += 0.3
self.dic_thresh_conf['e2e-pl'] = -100 # They don't have enough detections
self.dic_thresh_conf['oc-stereo'] = -100
# Extract validation images for evaluation
names_gt = tuple(os.listdir(self.dir_gt))
_, self.set_val = split_training(names_gt, path_train, path_val)
# self.set_val = ('002282.txt', )
# Define variables to save statistics
self.dic_methods = self.errors = self.dic_stds = self.dic_stats = self.dic_cnt = self.cnt_gt = self.category \
= None
self.cnt = 0
def run(self):
"""Evaluate Monoloco performances on ALP and ALE metrics"""
for self.category in self.CATEGORIES:
# Initialize variables
self.errors = defaultdict(lambda: defaultdict(list))
self.dic_stds = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
self.dic_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float))))
self.dic_cnt = defaultdict(int)
self.cnt_gt = defaultdict(int)
# Iterate over each ground truth file in the training set
# self.set_val = ('000063.txt',)
for name in self.set_val:
path_gt = os.path.join(self.dir_gt, name)
self.name = name
# Iterate over each line of the gt file and save box location and distances
out_gt = parse_ground_truth(path_gt, self.category)
methods_out = defaultdict(tuple) # Save all methods for comparison
# Count ground_truth:
boxes_gt, ys, truncs_gt, occs_gt = out_gt
for idx, box in enumerate(boxes_gt):
mode = get_difficulty(box, truncs_gt[idx], occs_gt[idx])
self.cnt_gt[mode] += 1
self.cnt_gt['all'] += 1
if out_gt[0]:
for method in self.methods:
# Extract annotations
dir_method = os.path.join(self.main_dir, method)
assert os.path.exists(dir_method), "directory of the method %s does not exists" % method
path_method = os.path.join(dir_method, name)
methods_out[method] = self._parse_txts(path_method, method=method)
# Compute the error with ground truth
self._estimate_error(out_gt, methods_out[method], method=method)
# Update statistics of errors and uncertainty
for key in self.errors:
add_true_negatives(self.errors[key], self.cnt_gt['all'])
for clst in self.CLUSTERS[:-1]:
try:
get_statistics(self.dic_stats['test'][key][clst],
self.errors[key][clst],
self.dic_stds[key][clst], key)
except ZeroDivisionError:
print('\n'+'-'*100 + '\n'+f'ERROR: method {key} at cluster {clst} is empty' + '\n'+'-'*100+'\n')
raise
# Show statistics
print('\n' + self.category.upper() + ':')
self.show_statistics()
def printer(self, show, save):
if save or show:
show_results(self.dic_stats, self.CLUSTERS, show, save)
show_spread(self.dic_stats, self.CLUSTERS, show, save)
show_box_plot(self.errors, self.CLUSTERS, show, save)
show_task_error(show, save)
def _parse_txts(self, path, method):
boxes = []
dds = []
cat = []
if method == 'psf':
path = os.path.splitext(path)[0] + '.png.txt'
if method in self.OUR_METHODS:
bis, epis = [], []
output = (boxes, dds, cat, bis, epis)
else:
output = (boxes, dds, cat)
try:
with open(path, "r") as ff:
for line_str in ff:
if method == 'psf':
line = line_str.split(", ")
box = [float(x) for x in line[4:8]]
boxes.append(box)
loc = ([float(x) for x in line[11:14]])
dd = math.sqrt(loc[0] ** 2 + loc[1] ** 2 + loc[2] ** 2)
dds.append(dd)
cat.append('Pedestrian')
else:
line = line_str.split()
if check_conditions(line,
category='pedestrian',
method=method,
thresh=self.dic_thresh_conf[method]):
box = [float(x) for x in line[4:8]]
box.append(float(line[15])) # Add confidence
loc = ([float(x) for x in line[11:14]])
dd = math.sqrt(loc[0] ** 2 + loc[1] ** 2 + loc[2] ** 2)
cat.append(line[0])
boxes.append(box)
dds.append(dd)
if method in self.OUR_METHODS:
bis.append(float(line[16]))
epis.append(float(line[17]))
self.dic_cnt[method] += 1
return output
except FileNotFoundError:
return output
def _estimate_error(self, out_gt, out, method):
"""Estimate localization error"""
boxes_gt, ys, truncs_gt, occs_gt = out_gt
if method in self.OUR_METHODS:
boxes, dds, cat, bis, epis = out
else:
boxes, dds, cat = out
if method == 'psf':
matches = get_iou_matches_matrix(boxes, boxes_gt, self.dic_thresh_iou[method])
else:
matches = get_iou_matches(boxes, boxes_gt, self.dic_thresh_iou[method])
for (idx, idx_gt) in matches:
# Update error if match is found
dd_gt = ys[idx_gt][3]
zz_gt = ys[idx_gt][2]
mode = get_difficulty(boxes_gt[idx_gt], truncs_gt[idx_gt], occs_gt[idx_gt])
if cat[idx].lower() in (self.category, 'pedestrian'):
self.update_errors(dds[idx], dd_gt, mode, self.errors[method])
if method == 'monoloco':
dd_task_error = dd_gt + (get_task_error(zz_gt))**2
dd_pixel_error = dd_gt + get_pixel_error(zz_gt)
self.update_errors(dd_task_error, dd_gt, mode, self.errors['task_error'])
self.update_errors(dd_pixel_error, dd_gt, mode, self.errors['pixel_error'])
if method in self.OUR_METHODS:
epi = max(epis[idx], bis[idx])
self.update_uncertainty(bis[idx], epi, dds[idx], dd_gt, mode, self.dic_stds[method])
def update_errors(self, dd, dd_gt, cat, errors):
"""Compute and save errors between a single box and the gt box which match"""
diff = abs(dd - dd_gt)
clst = find_cluster(dd_gt, self.CLUSTERS[4:])
errors['all'].append(diff)
errors[cat].append(diff)
errors[clst].append(diff)
# Check if the distance is less than one or 2 meters
if diff <= 0.5:
errors['<0.5m'].append(1)
else:
errors['<0.5m'].append(0)
if diff <= 1:
errors['<1m'].append(1)
else:
errors['<1m'].append(0)
if diff <= 2:
errors['<2m'].append(1)
else:
errors['<2m'].append(0)
def update_uncertainty(self, std_ale, std_epi, dd, dd_gt, mode, dic_stds):
clst = find_cluster(dd_gt, self.CLUSTERS[4:])
dic_stds['all']['ale'].append(std_ale)
dic_stds[clst]['ale'].append(std_ale)
dic_stds[mode]['ale'].append(std_ale)
dic_stds['all']['epi'].append(std_epi)
dic_stds[clst]['epi'].append(std_epi)
dic_stds[mode]['epi'].append(std_epi)
dic_stds['all']['epi_rel'].append(std_epi / dd)
dic_stds[clst]['epi_rel'].append(std_epi / dd)
dic_stds[mode]['epi_rel'].append(std_epi / dd)
# Number of annotations inside the confidence interval
std = std_epi if std_epi > 0 else std_ale # consider aleatoric uncertainty if epistemic is not calculated
if abs(dd - dd_gt) <= std:
dic_stds['all']['interval'].append(1)
dic_stds[clst]['interval'].append(1)
dic_stds[mode]['interval'].append(1)
else:
dic_stds['all']['interval'].append(0)
dic_stds[clst]['interval'].append(0)
dic_stds[mode]['interval'].append(0)
# Annotations at risk inside the confidence interval
if dd_gt <= dd:
dic_stds['all']['at_risk'].append(1)
dic_stds[clst]['at_risk'].append(1)
dic_stds[mode]['at_risk'].append(1)
if abs(dd - dd_gt) <= std_epi:
dic_stds['all']['at_risk-interval'].append(1)
dic_stds[clst]['at_risk-interval'].append(1)
dic_stds[mode]['at_risk-interval'].append(1)
else:
dic_stds['all']['at_risk-interval'].append(0)
dic_stds[clst]['at_risk-interval'].append(0)
dic_stds[mode]['at_risk-interval'].append(0)
else:
dic_stds['all']['at_risk'].append(0)
dic_stds[clst]['at_risk'].append(0)
dic_stds[mode]['at_risk'].append(0)
# Precision of uncertainty
eps = 1e-4
task_error = get_task_error(dd)
prec_1 = abs(dd - dd_gt) / (std_epi + eps)
prec_2 = abs(std_epi - task_error)
dic_stds['all']['prec_1'].append(prec_1)
dic_stds[clst]['prec_1'].append(prec_1)
dic_stds[mode]['prec_1'].append(prec_1)
dic_stds['all']['prec_2'].append(prec_2)
dic_stds[clst]['prec_2'].append(prec_2)
dic_stds[mode]['prec_2'].append(prec_2)
def show_statistics(self):
all_methods = self.methods + self.BASELINES
print('-'*90)
self.summary_table(all_methods)
# Uncertainty
for net in ('monoloco_pp', 'monstereo'):
print(('-'*100))
print(net.upper())
for clst in ('easy', 'moderate', 'hard', 'all'):
print(" Annotations in clst {}: {:.0f}, Recall: {:.1f}. Precision: {:.2f}, Relative size is {:.1f} %"
.format(clst,
self.dic_stats['test'][net][clst]['cnt'],
self.dic_stats['test'][net][clst]['interval']*100,
self.dic_stats['test'][net][clst]['prec_1'],
self.dic_stats['test'][net][clst]['epi_rel']*100))
if self.verbose:
for key in all_methods:
print(key.upper())
for clst in self.CLUSTERS[:4]:
print(" {} Average error in cluster {}: {:.2f} with a max error of {:.1f}, "
"for {} annotations"
.format(key, clst, self.dic_stats['test'][key][clst]['mean'],
self.dic_stats['test'][key][clst]['max'],
self.dic_stats['test'][key][clst]['cnt']))
for perc in self.ALP_THRESHOLDS:
print("{} Instances with error {}: {:.2f} %"
.format(key, perc, 100 * average(self.errors[key][perc])))
print("\nMatched annotations: {:.1f} %".format(self.errors[key]['matched']))
print(" Detected annotations : {}/{} ".format(self.dic_cnt[key], self.cnt_gt['all']))
print("-" * 100)
print("precision 1: {:.2f}".format(self.dic_stats['test']['monoloco']['all']['prec_1']))
print("precision 2: {:.2f}".format(self.dic_stats['test']['monoloco']['all']['prec_2']))
def summary_table(self, all_methods):
"""Tabulate table for ALP and ALE metrics"""
alp = [[str(100 * average(self.errors[key][perc]))[:5]
for perc in ['<0.5m', '<1m', '<2m']]
for key in all_methods]
ale = [[str(round(self.dic_stats['test'][key][clst]['mean'], 2))[:4] + ' [' +
str(round(self.dic_stats['test'][key][clst]['cnt'] / self.cnt_gt[clst] * 100))[:2] + '%]'
for clst in self.CLUSTERS[:4]]
for key in all_methods]
results = [[key] + alp[idx] + ale[idx] for idx, key in enumerate(all_methods)]
print(tabulate(results, headers=self.HEADERS))
print('-' * 90 + '\n')
def stats_height(self):
heights = []
for name in self.set_val:
path_gt = os.path.join(self.dir_gt, name)
self.name = name
# Iterate over each line of the gt file and save box location and distances
out_gt = parse_ground_truth(path_gt, 'pedestrian')
boxes_gt, ys, truncs_gt, occs_gt = out_gt
for label in ys:
heights.append(label[4])
import numpy as np
tail1, tail2 = np.nanpercentile(np.array(heights), [5, 95])
print(average(heights))
print(len(heights))
print(tail1, tail2)
def get_statistics(dic_stats, errors, dic_stds, key):
"""Update statistics of a cluster"""
try:
dic_stats['mean'] = average(errors)
dic_stats['max'] = max(errors)
dic_stats['cnt'] = len(errors)
except ValueError:
dic_stats['mean'] = - 1
dic_stats['max'] = - 1
dic_stats['cnt'] = - 1
if key in ('monoloco', 'monoloco_pp', 'monstereo'):
dic_stats['std_ale'] = average(dic_stds['ale'])
dic_stats['std_epi'] = average(dic_stds['epi'])
dic_stats['epi_rel'] = average(dic_stds['epi_rel'])
dic_stats['interval'] = average(dic_stds['interval'])
dic_stats['at_risk'] = average(dic_stds['at_risk'])
dic_stats['prec_1'] = average(dic_stds['prec_1'])
dic_stats['prec_2'] = average(dic_stds['prec_2'])
def add_true_negatives(err, cnt_gt):
"""Update errors statistics of a specific method with missing detections"""
matched = len(err['all'])
missed = cnt_gt - matched
zeros = [0] * missed
err['<0.5m'].extend(zeros)
err['<1m'].extend(zeros)
err['<2m'].extend(zeros)
err['matched'] = 100 * matched / cnt_gt
def find_cluster(dd, clusters):
"""Find the correct cluster. Above the last cluster goes into "excluded (together with the ones from kitti cat"""
for idx, clst in enumerate(clusters[:-1]):
if int(clst) < dd <= int(clusters[idx+1]):
return clst
return 'excluded'
def extract_indices(idx_to_check, *args):
"""
Look if a given index j_gt is present in all the other series of indices (_, j)
and return the corresponding one for argument
idx_check --> gt index to check for correspondences in other method
idx_method --> index corresponding to the method
idx_gt --> index gt of the method
idx_pred --> index of the predicted box of the method
indices --> list of predicted indices for each method corresponding to the ground truth index to check
"""
checks = [False]*len(args)
indices = []
for idx_method, method in enumerate(args):
for (idx_pred, idx_gt) in method:
if idx_gt == idx_to_check:
checks[idx_method] = True
indices.append(idx_pred)
return all(checks), indices
def average(my_list):
"""calculate mean of a list"""
return sum(my_list) / len(my_list)