Skip to content
This repository has been archived by the owner on Jun 2, 2020. It is now read-only.

Metrics computation is wrong? #33

Open
elch10 opened this issue Aug 4, 2019 · 1 comment
Open

Metrics computation is wrong? #33

elch10 opened this issue Aug 4, 2019 · 1 comment
Assignees
Labels
wontfix This will not be worked on

Comments

@elch10
Copy link

elch10 commented Aug 4, 2019

Describe the bug

I was wondering when saw strange results of my metrics. I have custom metrics-wrappers around sklearn metrics:

from neural_pipeline.train_config import AbstractMetric, MetricsProcessor, MetricsGroup
from sklearn.metrics import precision_score, recall_score, accuracy_score

class Metric(AbstractMetric):
  def __init__(self, name, function):
    super().__init__(name)
    self.function = function

  def calc(self, output: torch.Tensor, target: torch.Tensor) -> np.ndarray or float:
    predicted = output.gt(0.5)
    return self.function(target, predicted)

class Metrics(MetricsProcessor):
  def __init__(self, stage_name: str):
    super().__init__()
    accuracy = Metric('accuracy', accuracy_score)
    precision = Metric('precision', precision_score)
    recall = Metric('recall', recall_score)
    self.add_metrics_group(MetricsGroup(stage_name).\
                           add(accuracy).\
                           add(precision).\
                           add(recall))

Configuration is following:

train_batch_size = 32
val_batch_size = len(X_test)

train_dataset = DataProducer([Dataset(X_train, y_train)], batch_size=train_batch_size)
validation_dataset = DataProducer([Dataset(X_test, y_test)], batch_size=val_batch_size)

train_stages = [TrainStage(train_dataset, Metrics('train')), 
                ValidationStage(validation_dataset, Metrics('validation'))]
loss = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
train_config = TrainConfig(train_stages, loss, optimizer)

fsm = FileStructManager(base_dir='data', is_continue=False)

epochs = 1
trainer = Trainer(model, train_config, fsm, device).set_epoch_num(epochs)
trainer.monitor_hub.add_monitor(TensorboardMonitor(fsm, is_continue=False))\
                   .add_monitor(LogMonitor(fsm))

trainer.train()

After training I load the metrics from data/monitors/metrics_log/metrics_log.json I got following results for validation data:

изображение

But I did same manually and got different result
изображение
I understand that if I used the batch_size that is not equal to the len of validation dataset I would get a different result. But here this is not the case. Another problem that I also found is result of last training step also differ from those computed manually no matter what value for epochs is used. I can't find error in the code, but it seems to me as magic

@toodef
Copy link
Owner

toodef commented Aug 4, 2019

@elch10 there is a architecture bug, that will fix in recent release.
As a hotfix you should override get_values method.

As example i do something like that:

class _ClassificationMetric(AbstractMetric):
    def __init__(self, name: str):
        super().__init__(name)

        self._targets, self._preds = [], []
        self._pred_preprocess = lambda x: x.data.cpu().numpy()
        self._target_preprocess = lambda x: x.data.cpu().numpy()

    def set_pred_preproc(self, preproc: callable) -> '_ClassificationMetric':
        self._pred_preprocess = preproc
        return self

    def set_target_preproc(self, preproc: callable) -> '_ClassificationMetric':
        self._target_preprocess = preproc
        return self

    @staticmethod
    def multiclass_pred_preproc(val: Tensor):
        """
        Multiclass predict preprocess method.

        Method choose index of max element. If index == 0, than return 1-val[index], otherwise val[index].

        For example:
        ```
            pred = torch.Tensor([[0.1, 0.5, 0.3],
                                 [0.9, 0.89, 0.1],
                                 [0.9, 0.3, 0.99],
                                 [0.1, 0.4, 0.3]])

            res = ROCAUCMetric.multiclass_pred_preproc(pred)
            res: [0.5, 0.1, 0.99, 0.4]
        ```

        Args:
              val (Tensor): values to preprocess as Tensor of size [B, C]

        Returns:
            np.ndarray of shape [B]
        """
        val_internal = val.data.cpu().numpy()
        idx = np.argmax(val_internal, axis=1)
        max_vals = val_internal[np.arange(len(val_internal)), idx]
        return np.squeeze(np.where(idx > 0, max_vals, 1 - max_vals))

    @staticmethod
    def multiclass_target_preproc(val: Tensor):
        """
        Multiclass target preprocess method.

        Args:
              val (Tensor): values to target as Tensor of size [B, 1]

        Returns:
            np.ndarray of shape [B]
        """
        val_internal = val.data.cpu().numpy()
        return np.squeeze(np.clip(val_internal, 0, 1).astype(np.int))

    def calc(self, predict: Tensor, target: Tensor) -> np.ndarray or float:
        """
        Calc metric

        Args:
            predict (Tensor): predict classes as Tensor of size [B, C]
            target (Tensor): ground truth classes as Tensor of size [B, C]
        Returns:
             return zero cause metric accumulate all values and calc when :meth:`get_values`
        """
        pred = self._pred_preprocess(predict)
        tar = self._target_preprocess(target)

        self._preds.extend(pred)
        self._targets.extend(tar)
        return 0

    def _calc(self, output: Tensor, target: Tensor):
        self.calc(output, target)

    @abstractmethod
    def _get_values(self):
        pass

    def get_values(self):
        """
        Get values of metric
        :return:
        """
        res = self._get_values()
        self._targets, self._preds = [], []
        return np.array([res]) if type(res) is float else res


class ROCAUCMetric(_ClassificationMetric):
    def __init__(self, thresold: float, name: str = 'ROC_AUC'):
        super().__init__(name)
        self._thresh = thresold

    def _get_values(self):
        preds = np.where(np.squeeze(self._preds) < self._thresh, 0, 1)

        try:
            res = roc_auc_score(np.squeeze(self._targets), preds)
        except ValueError:
            return np.nan

        return res


class RecallMetric(_ClassificationMetric):
    def __init__(self, threshold: float, name: str = "Recall"):
        super().__init__(name)
        self._thresh = threshold

    def _get_values(self):
        preds = np.where(np.squeeze(self._preds) < self._thresh, 0, 1)
        tn, fp, fn, tp = confusion_matrix(np.squeeze(self._targets), preds).ravel()
        return tp / (tp + fn)


class ActCMetric(_ClassificationMetric):
    def __init__(self, threshold: float, name: str = "ActC"):
        super().__init__(name)
        self._thresh = threshold

    def _get_values(self):
        preds = np.where(np.squeeze(self._preds) < self._thresh, 0, 1)
        tn, fp, fn, tp = confusion_matrix(np.squeeze(self._targets), preds).ravel()
        return fp / (fp + tn) + 19 * fn / (fn + tp)


class ClassificationMetricsProcessor(MetricsProcessor):
    def __init__(self, name: str, thresholds: [float]):
        super().__init__()

        self._auc_metrics = []
        auc_group = MetricsGroup('ROC_AUC')
        if thresholds is None:
            self._auc_metrics.append(ROCAUCMetric(0.5, name))
            auc_group.add(self._auc_metrics[-1])
        else:
            for thresh in thresholds:
                self._auc_metrics.append(ROCAUCMetric(thresh, '{}_{}'.format(name, thresh)))
                auc_group.add(self._auc_metrics[-1])

        self.add_metrics_group(auc_group)

    def set_pred_preproc(self, preproc: callable) -> 'ClassificationMetricsProcessor':
        for m in self._auc_metrics:
            m.set_pred_preproc(preproc)
        return self

    def set_target_preproc(self, preproc: callable) -> 'ClassificationMetricsProcessor':
        for m in self._auc_metrics:
            m.set_target_preproc(preproc)
        return self

The goal is calc method works for every batch. The get_values calls after epoch. So you need manually collect values, and calc result inget_values.

@toodef toodef added the wontfix This will not be worked on label Aug 4, 2019
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
wontfix This will not be worked on
Projects
None yet
Development

No branches or pull requests

2 participants