From 0333295edbdb687273d5a8e19bdf231babcfd7bc Mon Sep 17 00:00:00 2001
From: Charles Joseph Pierre Beauville <charlesbeauville@gmail.com>
Date: Sun, 27 Jun 2021 23:55:03 +0200
Subject: [PATCH] Removed redundancy

---
 monoloco/run.py                         |   8 +-
 monoloco/train/__init__.py              |   1 -
 monoloco/train/trainer_casr.py          |   6 +-
 monoloco/train/trainer_casr_standard.py | 363 ------------------------
 4 files changed, 7 insertions(+), 371 deletions(-)
 delete mode 100644 monoloco/train/trainer_casr_standard.py

diff --git a/monoloco/run.py b/monoloco/run.py
index 1a1febe..c784064 100644
--- a/monoloco/run.py
+++ b/monoloco/run.py
@@ -100,7 +100,8 @@ def cli():
     training_parser.add_argument('--n_stage', type=int, help='Number of stages in the model', default=3)
     training_parser.add_argument('--hyp', help='run hyperparameters tuning', action='store_true')
     training_parser.add_argument('--casr', help='run casr training', action='store_true')
-    training_parser.add_argument('--casr_std', help='run casr training', action='store_true')
+    training_parser.add_argument('--std', help='run casr training with only standard gestures',
+                                 action='store_true')
     training_parser.add_argument('--multiplier', type=int, help='Size of the grid of hyp search', default=1)
     training_parser.add_argument('--r_seed', type=int, help='specify the seed for training and hyp tuning', default=1)
     training_parser.add_argument('--print_loss', help='print training and validation losses', action='store_true')
@@ -184,11 +185,6 @@ def main():
             training = CASRTrainer(args)
             _ = training.train()
             _ = training.evaluate()
-        elif args.casr_std:
-            from .train import CASRTrainerStandard
-            training = CASRTrainerStandard(args)
-            _ = training.train()
-            _ = training.evaluate()
         else:
             from .train import Trainer
             training = Trainer(args)
diff --git a/monoloco/train/__init__.py b/monoloco/train/__init__.py
index bbc7aa3..5d48a38 100644
--- a/monoloco/train/__init__.py
+++ b/monoloco/train/__init__.py
@@ -3,4 +3,3 @@ from .hyp_tuning import HypTuning
 from .hyp_tuning_casr import HypTuningCasr
 from .trainer import Trainer
 from .trainer_casr import CASRTrainer
-from .trainer_casr_standard import CASRTrainerStandard
diff --git a/monoloco/train/trainer_casr.py b/monoloco/train/trainer_casr.py
index 4201444..6012635 100644
--- a/monoloco/train/trainer_casr.py
+++ b/monoloco/train/trainer_casr.py
@@ -60,13 +60,17 @@ class CASRTrainer:
         self.r_seed = args.r_seed
         self.auto_tune_mtl = args.auto_tune_mtl
 
+        if args.std:
+            self.output_size = 3
+            name = 'casr_standard'
+        else:
+            name = 'casr'
         # Select path out
         if args.out:
             self.path_out = args.out  # full path without extension
             dir_out, _ = os.path.split(self.path_out)
         else:
             dir_out = os.path.join('data', 'outputs')
-            name = 'casr'
             now = datetime.datetime.now()
             now_time = now.strftime("%Y%m%d-%H%M")[2:]
             name_out = name + '-' + now_time + '.pkl'
diff --git a/monoloco/train/trainer_casr_standard.py b/monoloco/train/trainer_casr_standard.py
deleted file mode 100644
index f57b4cd..0000000
--- a/monoloco/train/trainer_casr_standard.py
+++ /dev/null
@@ -1,363 +0,0 @@
-# pylint: disable=too-many-statements
-
-"""
-Training and evaluation of a neural network that, given 2D joints, estimates:
-- 3D localization and confidence intervals
-- Orientation
-- Bounding box dimensions
-"""
-
-import copy
-import os
-import datetime
-import logging
-from collections import defaultdict
-import sys
-import time
-from itertools import chain
-
-import matplotlib.pyplot as plt
-import torch
-from torch.utils.data import DataLoader
-from torch.optim import lr_scheduler
-
-from .. import __version__
-from .datasets import KeypointsDataset
-from .losses import CompositeLoss, MultiTaskLoss, AutoTuneMultiTaskLoss
-from ..network import extract_outputs, extract_labels
-from ..network.architectures import LocoModel
-from ..utils import set_logger
-
-
-class CASRTrainerStandard:
-    # Constants
-    VAL_BS = 10000
-
-    tasks = ('cyclist',)
-    val_task = 'cyclist'
-    lambdas = (1,)
-    #clusters = ['10', '20', '30', '40']
-    input_size = 34
-    output_size = 3
-    dir_figures = os.path.join('figures', 'losses')
-
-    def __init__(self, args):
-        """
-        Initialize directories, load the data and parameters for the training
-        """
-
-        assert os.path.exists(args.joints), "Input file not found"
-        self.mode = args.mode
-        self.joints = args.joints
-        self.num_epochs = args.epochs
-        self.no_save = args.no_save
-        self.print_loss = args.print_loss
-        self.lr = args.lr
-        self.sched_step = args.sched_step
-        self.sched_gamma = args.sched_gamma
-        self.hidden_size = args.hidden_size
-        self.n_stage = args.n_stage
-        self.r_seed = args.r_seed
-        self.auto_tune_mtl = args.auto_tune_mtl
-
-        # Select path out
-        if args.out:
-            self.path_out = args.out  # full path without extension
-            dir_out, _ = os.path.split(self.path_out)
-        else:
-            dir_out = os.path.join('data', 'outputs')
-            name = 'casr_standard'
-            now = datetime.datetime.now()
-            now_time = now.strftime("%Y%m%d-%H%M")[2:]
-            name_out = name + '-' + now_time + '.pkl'
-            self.path_out = os.path.join(dir_out, name_out)
-        assert os.path.exists(dir_out), "Directory to save the model not found"
-        print(self.path_out)
-        # Select the device
-        use_cuda = torch.cuda.is_available()
-        self.device = torch.device("cuda" if use_cuda else "cpu")
-        print('Device: ', self.device)
-        torch.manual_seed(self.r_seed)
-        if use_cuda:
-            torch.cuda.manual_seed(self.r_seed)
-
-        losses_tr, losses_val = CompositeLoss(self.tasks)()
-
-        if self.auto_tune_mtl:
-            self.mt_loss = AutoTuneMultiTaskLoss(losses_tr, losses_val, self.lambdas, self.tasks)
-        else:
-            self.mt_loss = MultiTaskLoss(losses_tr, losses_val, self.lambdas, self.tasks)
-        self.mt_loss.to(self.device)
-
-        # Dataloader
-        self.dataloaders = {phase: DataLoader(KeypointsDataset(self.joints, phase=phase),
-                                              batch_size=args.bs, shuffle=True) for phase in ['train', 'val']}
-
-        self.dataset_sizes = {phase: len(KeypointsDataset(self.joints, phase=phase))
-                              for phase in ['train', 'val']}
-        self.dataset_version = KeypointsDataset(self.joints, phase='train').get_version()
-
-        self._set_logger(args)
-
-        # Define the model
-        self.logger.info('Sizes of the dataset: {}'.format(self.dataset_sizes))
-        print(">>> creating model")
-
-        self.model = LocoModel(
-            input_size=self.input_size,
-            output_size=self.output_size,
-            linear_size=args.hidden_size,
-            p_dropout=args.dropout,
-            num_stage=self.n_stage,
-            device=self.device,
-        )
-        self.model.to(self.device)
-        print(">>> model params: {:.3f}M".format(sum(p.numel() for p in self.model.parameters()) / 1000000.0))
-        print(">>> loss params: {}".format(sum(p.numel() for p in self.mt_loss.parameters())))
-
-        # Optimizer and scheduler
-        all_params = chain(self.model.parameters(), self.mt_loss.parameters())
-        self.optimizer = torch.optim.Adam(params=all_params, lr=args.lr)
-        self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min')
-        self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=self.sched_step, gamma=self.sched_gamma)
-
-    def train(self):
-        since = time.time()
-        best_model_wts = copy.deepcopy(self.model.state_dict())
-        best_acc = 1e6
-        best_training_acc = 1e6
-        best_epoch = 0
-        epoch_losses = defaultdict(lambda: defaultdict(list))
-        for epoch in range(self.num_epochs):
-            running_loss = defaultdict(lambda: defaultdict(int))
-
-            # Each epoch has a training and validation phase
-            for phase in ['train', 'val']:
-                if phase == 'train':
-                    self.model.train()  # Set model to training mode
-                else:
-                    self.model.eval()  # Set model to evaluate mode
-
-                for inputs, labels, _, _ in self.dataloaders[phase]:
-                    inputs = inputs.to(self.device)
-                    labels = labels.to(self.device)
-                    with torch.set_grad_enabled(phase == 'train'):
-                        if phase == 'train':
-                            self.optimizer.zero_grad()
-                            outputs = self.model(inputs)
-                            loss, _ = self.mt_loss(outputs, labels, phase=phase)
-                            loss.backward()
-                            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 3)
-                            self.optimizer.step()
-                            self.scheduler.step()
-
-                        else:
-                            outputs = self.model(inputs)
-                        with torch.no_grad():
-                            loss_eval, loss_values_eval = self.mt_loss(outputs, labels, phase='val')
-                            self.epoch_logs(phase, loss_eval, loss_values_eval, inputs, running_loss)
-
-            self.cout_values(epoch, epoch_losses, running_loss)
-
-            # deep copy the model
-
-            if epoch_losses['val'][self.val_task][-1] < best_acc:
-                best_acc = epoch_losses['val'][self.val_task][-1]
-                best_training_acc = epoch_losses['train']['all'][-1]
-                best_epoch = epoch
-                best_model_wts = copy.deepcopy(self.model.state_dict())
-
-        time_elapsed = time.time() - since
-        print('\n\n' + '-' * 120)
-        self.logger.info('Training:\nTraining complete in {:.0f}m {:.0f}s'
-                         .format(time_elapsed // 60, time_elapsed % 60))
-        self.logger.info('Best training Accuracy: {:.3f}'.format(best_training_acc))
-        self.logger.info('Best validation Accuracy for {}: {:.3f}'.format(self.val_task, best_acc))
-        self.logger.info('Saved weights of the model at epoch: {}'.format(best_epoch))
-
-        self._print_losses(epoch_losses)
-
-        # load best model weights
-        self.model.load_state_dict(best_model_wts)
-        return best_epoch
-
-    def epoch_logs(self, phase, loss, loss_values, inputs, running_loss):
-
-        running_loss[phase]['all'] += loss.item() * inputs.size(0)
-        for i, task in enumerate(self.tasks):
-            running_loss[phase][task] += loss_values[i].item() * inputs.size(0)
-
-    def evaluate(self, load=False, model=None, debug=False):
-
-        # To load a model instead of using the trained one
-        if load:
-            self.model.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage))
-
-        # Average distance on training and test set after unnormalizing
-        self.model.eval()
-        dic_err = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0)))  # initialized to zero
-        dic_err['val']['sigmas'] = [0.] * len(self.tasks)
-        dataset = KeypointsDataset(self.joints, phase='val')
-        size_eval = len(dataset)
-        start = 0
-        with torch.no_grad():
-            for end in range(self.VAL_BS, size_eval + self.VAL_BS, self.VAL_BS):
-                end = end if end < size_eval else size_eval
-                inputs, labels, _, _ = dataset[start:end]
-                start = end
-                inputs = inputs.to(self.device)
-                labels = labels.to(self.device)
-
-                # Debug plot for input-output distributions
-                if debug:
-                    debug_plots(inputs, labels)
-                    sys.exit()
-
-                # Forward pass
-                # outputs = self.model(inputs)
-                #self.compute_stats(outputs, labels, dic_err['val'], size_eval, clst='all')
-
-            # self.cout_stats(dic_err['val'], size_eval, clst='all')
-            # Evaluate performances on different clusters and save statistics
-
-        # Save the model and the results
-        if not (self.no_save or load):
-            torch.save(self.model.state_dict(), self.path_model)
-            print('-' * 120)
-            self.logger.info("\nmodel saved: {} \n".format(self.path_model))
-        else:
-            self.logger.info("\nmodel not saved\n")
-
-        return dic_err, self.model
-
-    def compute_stats(self, outputs, labels, dic_err, size_eval, clst):
-        """Compute mean, bi and max of torch tensors"""
-
-        _, loss_values = self.mt_loss(outputs, labels, phase='val')
-        rel_frac = outputs.size(0) / size_eval
-
-        tasks = self.tasks  # Exclude auxiliary
-
-        for idx, task in enumerate(tasks):
-            dic_err[clst][task] += float(loss_values[idx].item()) * (outputs.size(0) / size_eval)
-
-        # Distance
-        errs = torch.abs(extract_outputs(outputs)['d'] - extract_labels(labels)['d'])
-        assert rel_frac > 0.99, "Variance of errors not supported with partial evaluation"
-
-        # Uncertainty
-        bis = extract_outputs(outputs)['bi'].cpu()
-        bi = float(torch.mean(bis).item())
-        bi_perc = float(torch.sum(errs <= bis)) / errs.shape[0]
-        dic_err[clst]['bi'] += bi * rel_frac
-        dic_err[clst]['bi%'] += bi_perc * rel_frac
-        dic_err[clst]['std'] = errs.std()
-
-        # (Don't) Save auxiliary task results
-        dic_err['sigmas'].append(0)
-
-        if self.auto_tune_mtl:
-            assert len(loss_values) == 2 * len(self.tasks)
-            for i, _ in enumerate(self.tasks):
-                dic_err['sigmas'][i] += float(loss_values[len(tasks) + i + 1].item()) * rel_frac
-
-    def cout_stats(self, dic_err, size_eval, clst):
-        if clst == 'all':
-            print('-' * 120)
-            self.logger.info("Evaluation, val set: \nAv. dist D: {:.2f} m with bi {:.2f} ({:.1f}%), \n"
-                             "X: {:.1f} cm,  Y: {:.1f} cm \nOri: {:.1f}  "
-                             "\n H: {:.1f} cm, W: {:.1f} cm, L: {:.1f} cm"
-                             "\nAuxiliary Task: {:.1f} %, "
-                             .format(dic_err[clst]['d'], dic_err[clst]['bi'], dic_err[clst]['bi%'] * 100,
-                                     dic_err[clst]['x'] * 100, dic_err[clst]['y'] * 100,
-                                     dic_err[clst]['ori'], dic_err[clst]['h'] * 100, dic_err[clst]['w'] * 100,
-                                     dic_err[clst]['l'] * 100, dic_err[clst]['aux'] * 100))
-            if self.auto_tune_mtl:
-                self.logger.info("Sigmas: Z: {:.2f}, X: {:.2f}, Y:{:.2f}, H: {:.2f}, W: {:.2f}, L: {:.2f}, ORI: {:.2f}"
-                                 " AUX:{:.2f}\n"
-                                 .format(*dic_err['sigmas']))
-        else:
-            self.logger.info("Val err clust {} --> D:{:.2f}m,  bi:{:.2f} ({:.1f}%), STD:{:.1f}m   X:{:.1f} Y:{:.1f}  "
-                             "Ori:{:.1f}d,   H: {:.0f} W: {:.0f} L:{:.0f}  for {} pp. "
-                             .format(clst, dic_err[clst]['d'], dic_err[clst]['bi'], dic_err[clst]['bi%'] * 100,
-                                     dic_err[clst]['std'], dic_err[clst]['x'] * 100, dic_err[clst]['y'] * 100,
-                                     dic_err[clst]['ori'], dic_err[clst]['h'] * 100, dic_err[clst]['w'] * 100,
-                                     dic_err[clst]['l'] * 100, size_eval))
-
-    def cout_values(self, epoch, epoch_losses, running_loss):
-
-        string = '\r' + '{:.0f} '
-        format_list = [epoch]
-        for phase in running_loss:
-            string = string + phase[0:1].upper() + ':'
-            for el in running_loss['train']:
-                loss = running_loss[phase][el] / self.dataset_sizes[phase]
-                epoch_losses[phase][el].append(loss)
-                if el == 'all':
-                    string = string + ':{:.1f}  '
-                    format_list.append(loss)
-                elif el in ('ori', 'aux'):
-                    string = string + el + ':{:.1f}  '
-                    format_list.append(loss)
-                else:
-                    string = string + el + ':{:.0f}  '
-                    format_list.append(loss * 100)
-
-        if epoch % 10 == 0:
-            print(string.format(*format_list))
-
-    def _print_losses(self, epoch_losses):
-        if not self.print_loss:
-            return
-        os.makedirs(self.dir_figures, exist_ok=True)
-        for idx, phase in enumerate(epoch_losses):
-            for idx_2, el in enumerate(epoch_losses['train']):
-                plt.figure(idx + idx_2)
-                plt.title(phase + '_' + el)
-                plt.xlabel('epochs')
-                plt.plot(epoch_losses[phase][el][10:], label='{} Loss: {}'.format(phase, el))
-                plt.savefig(os.path.join(self.dir_figures, '{}_loss_{}.png'.format(phase, el)))
-                plt.close()
-
-    def _set_logger(self, args):
-        if self.no_save:
-            logging.basicConfig(level=logging.INFO)
-            self.logger = logging.getLogger(__name__)
-        else:
-            self.path_model = self.path_out
-            print(self.path_model)
-            self.logger = set_logger(os.path.splitext(self.path_out)[0])  # remove .pkl
-            self.logger.info(  # pylint: disable=logging-fstring-interpolation
-                f'\nVERSION: {__version__}\n'
-                f'\nINPUT_FILE: {args.joints}'
-                f'\nInput file version: {self.dataset_version}'
-                f'\nTorch version: {torch.__version__}\n'
-                f'\nTraining arguments:'
-                f'\nmode: {self.mode} \nlearning rate: {args.lr} \nbatch_size: {args.bs}'
-                f'\nepochs: {args.epochs} \ndropout: {args.dropout} '
-                f'\nscheduler step: {args.sched_step} \nscheduler gamma: {args.sched_gamma} '
-                f'\ninput_size: {self.input_size} \noutput_size: {self.output_size} '
-                f'\nhidden_size: {args.hidden_size}'
-                f' \nn_stages: {args.n_stage} \n r_seed: {args.r_seed} \nlambdas: {self.lambdas}'
-            )
-
-
-def debug_plots(inputs, labels):
-    inputs_shoulder = inputs.cpu().numpy()[:, 5]
-    inputs_hip = inputs.cpu().numpy()[:, 11]
-    labels = labels.cpu().numpy()
-    heights = inputs_hip - inputs_shoulder
-    plt.figure(1)
-    plt.hist(heights, bins='auto')
-    plt.show()
-    plt.figure(2)
-    plt.hist(labels, bins='auto')
-    plt.show()
-
-
-def get_accuracy(outputs, labels):
-    """From Binary cross entropy outputs to accuracy"""
-
-    mask = outputs >= 0.5
-    accuracy = 1. - torch.mean(torch.abs(mask.float() - labels)).item()
-    return accuracy