Skip to content

Evaluation

This module provides evaluation methods for classification, regression and clustering. Available metrics include:

  1. AUC: Compute AUC for binary classification.
  2. KS: Compute Kolmogorov-Smirnov for binary classification.
  3. LIFT: Compute lift of binary classification.
  4. PRECISION: Compute the precision for binary and multi-classification
  5. RECALL: Compute the recall for binary and multi-classification
  6. ACCURACY: Compute the accuracy for binary and multi-classification
  7. EXPLAINED_VARIANCE: Compute explain variance for regression tasks
  8. MEAN_ABSOLUTE_ERROR: Compute mean absolute error for regression tasks
  9. MEAN_SQUARED_ERROR: Compute mean square error for regression tasks
  10. MEAN_SQUARED_LOG_ERROR: Compute mean squared logarithmic error for regression tasks
  11. MEDIAN_ABSOLUTE_ERROR: Compute median absolute error for regression tasks
  12. R2_SCORE: Compute R^2 (coefficient of determination) score for regression tasks
  13. ROOT_MEAN_SQUARED_ERROR: Compute the root of mean square error for regression tasks
  14. JACCARD_SIMILARITY_SCORE:Compute Jaccard similarity score for clustering tasks (labels are needed)
  15. ADJUSTED_RAND_SCORE:Compute adjusted rand score for clustering tasks (labels are needed)
  16. FOWLKES_MALLOWS_SCORE:Compute Fowlkes Mallows score for clustering tasks (labels are needed)
  17. DAVIES_BOULDIN_INDEX:Compute Davies Bouldin index for clustering tasks
  18. DISTANCE_MEASURE:Compute cluster information in clustering algorithms
  19. CONTINGENCY_MATRIX:Compute contingency matrix for clustering tasks (labels are needed)
  20. PSI: Compute Population Stability Index.
  21. F1-Score: Compute F1-Score for binary tasks.

Param

evaluation_param

Classes

EvaluateParam (BaseParam)

Define the evaluation method of binary/multiple classification and regression

Parameters

eval_type : {'binary', 'regression', 'multi'} support 'binary' for HomoLR, HeteroLR and Secureboosting, support 'regression' for Secureboosting, 'multi' is not support these version

unfold_multi_result : bool unfold multi result and get several one-vs-rest binary classification results

pos_label : int or float or str specify positive label type, depend on the data's label. this parameter effective only for 'binary'

bool, default True

Indicate if this module needed to be run

Source code in federatedml/param/evaluation_param.py
class EvaluateParam(BaseParam):
    """
    Define the evaluation method of binary/multiple classification and regression

    Parameters
    ----------
    eval_type : {'binary', 'regression', 'multi'}
        support 'binary' for HomoLR, HeteroLR and Secureboosting,
        support 'regression' for Secureboosting,
        'multi' is not support these version

    unfold_multi_result : bool
        unfold multi result and get several one-vs-rest binary classification results

    pos_label : int or float or str
        specify positive label type, depend on the data's label. this parameter effective only for 'binary'

    need_run: bool, default True
        Indicate if this module needed to be run
    """

    def __init__(self, eval_type="binary", pos_label=1, need_run=True, metrics=None,
                 run_clustering_arbiter_metric=False, unfold_multi_result=False):
        super().__init__()
        self.eval_type = eval_type
        self.pos_label = pos_label
        self.need_run = need_run
        self.metrics = metrics
        self.unfold_multi_result = unfold_multi_result
        self.run_clustering_arbiter_metric = run_clustering_arbiter_metric

        self.default_metrics = {
            consts.BINARY: consts.ALL_BINARY_METRICS,
            consts.MULTY: consts.ALL_MULTI_METRICS,
            consts.REGRESSION: consts.ALL_REGRESSION_METRICS,
            consts.CLUSTERING: consts.ALL_CLUSTER_METRICS
        }

        self.allowed_metrics = {
            consts.BINARY: consts.ALL_BINARY_METRICS,
            consts.MULTY: consts.ALL_MULTI_METRICS,
            consts.REGRESSION: consts.ALL_REGRESSION_METRICS,
            consts.CLUSTERING: consts.ALL_CLUSTER_METRICS
        }

    def _use_single_value_default_metrics(self):

        self.default_metrics = {
            consts.BINARY: consts.DEFAULT_BINARY_METRIC,
            consts.MULTY: consts.DEFAULT_MULTI_METRIC,
            consts.REGRESSION: consts.DEFAULT_REGRESSION_METRIC,
            consts.CLUSTERING: consts.DEFAULT_CLUSTER_METRIC
        }

    def _check_valid_metric(self, metrics_list):

        metric_list = consts.ALL_METRIC_NAME
        alias_name: dict = consts.ALIAS

        full_name_list = []

        metrics_list = [str.lower(i) for i in metrics_list]

        for metric in metrics_list:

            if metric in metric_list:
                if metric not in full_name_list:
                    full_name_list.append(metric)
                continue

            valid_flag = False
            for alias, full_name in alias_name.items():
                if metric in alias:
                    if full_name not in full_name_list:
                        full_name_list.append(full_name)
                    valid_flag = True
                    break

            if not valid_flag:
                raise ValueError('metric {} is not supported'.format(metric))

        allowed_metrics = self.allowed_metrics[self.eval_type]

        for m in full_name_list:
            if m not in allowed_metrics:
                raise ValueError('metric {} is not used for {} task'.format(m, self.eval_type))

        if consts.RECALL in full_name_list and consts.PRECISION not in full_name_list:
            full_name_list.append(consts.PRECISION)

        if consts.RECALL not in full_name_list and consts.PRECISION in full_name_list:
            full_name_list.append(consts.RECALL)

        return full_name_list

    def check(self):

        descr = "evaluate param's "
        self.eval_type = self.check_and_change_lower(self.eval_type,
                                                     [consts.BINARY, consts.MULTY, consts.REGRESSION,
                                                      consts.CLUSTERING],
                                                     descr)

        if type(self.pos_label).__name__ not in ["str", "float", "int"]:
            raise ValueError(
                "evaluate param's pos_label {} not supported, should be str or float or int type".format(
                    self.pos_label))

        if type(self.need_run).__name__ != "bool":
            raise ValueError(
                "evaluate param's need_run {} not supported, should be bool".format(
                    self.need_run))

        if self.metrics is None or len(self.metrics) == 0:
            self.metrics = self.default_metrics[self.eval_type]
            LOGGER.warning('use default metric {} for eval type {}'.format(self.metrics, self.eval_type))

        self.check_boolean(self.unfold_multi_result, 'multi_result_unfold')

        self.metrics = self._check_valid_metric(self.metrics)

        LOGGER.info("Finish evaluation parameter check!")

        return True

    def check_single_value_default_metric(self):
        self._use_single_value_default_metrics()

        # in validation strategy, psi f1-score and confusion-mat pr-quantile are not supported in cur version
        if self.metrics is None or len(self.metrics) == 0:
            self.metrics = self.default_metrics[self.eval_type]
            LOGGER.warning('use default metric {} for eval type {}'.format(self.metrics, self.eval_type))

        ban_metric = [consts.PSI, consts.F1_SCORE, consts.CONFUSION_MAT, consts.QUANTILE_PR]
        for metric in self.metrics:
            if metric in ban_metric:
                self.metrics.remove(metric)
        self.check()
__init__(self, eval_type='binary', pos_label=1, need_run=True, metrics=None, run_clustering_arbiter_metric=False, unfold_multi_result=False) special
Source code in federatedml/param/evaluation_param.py
def __init__(self, eval_type="binary", pos_label=1, need_run=True, metrics=None,
             run_clustering_arbiter_metric=False, unfold_multi_result=False):
    super().__init__()
    self.eval_type = eval_type
    self.pos_label = pos_label
    self.need_run = need_run
    self.metrics = metrics
    self.unfold_multi_result = unfold_multi_result
    self.run_clustering_arbiter_metric = run_clustering_arbiter_metric

    self.default_metrics = {
        consts.BINARY: consts.ALL_BINARY_METRICS,
        consts.MULTY: consts.ALL_MULTI_METRICS,
        consts.REGRESSION: consts.ALL_REGRESSION_METRICS,
        consts.CLUSTERING: consts.ALL_CLUSTER_METRICS
    }

    self.allowed_metrics = {
        consts.BINARY: consts.ALL_BINARY_METRICS,
        consts.MULTY: consts.ALL_MULTI_METRICS,
        consts.REGRESSION: consts.ALL_REGRESSION_METRICS,
        consts.CLUSTERING: consts.ALL_CLUSTER_METRICS
    }
check(self)
Source code in federatedml/param/evaluation_param.py
def check(self):

    descr = "evaluate param's "
    self.eval_type = self.check_and_change_lower(self.eval_type,
                                                 [consts.BINARY, consts.MULTY, consts.REGRESSION,
                                                  consts.CLUSTERING],
                                                 descr)

    if type(self.pos_label).__name__ not in ["str", "float", "int"]:
        raise ValueError(
            "evaluate param's pos_label {} not supported, should be str or float or int type".format(
                self.pos_label))

    if type(self.need_run).__name__ != "bool":
        raise ValueError(
            "evaluate param's need_run {} not supported, should be bool".format(
                self.need_run))

    if self.metrics is None or len(self.metrics) == 0:
        self.metrics = self.default_metrics[self.eval_type]
        LOGGER.warning('use default metric {} for eval type {}'.format(self.metrics, self.eval_type))

    self.check_boolean(self.unfold_multi_result, 'multi_result_unfold')

    self.metrics = self._check_valid_metric(self.metrics)

    LOGGER.info("Finish evaluation parameter check!")

    return True
check_single_value_default_metric(self)
Source code in federatedml/param/evaluation_param.py
def check_single_value_default_metric(self):
    self._use_single_value_default_metrics()

    # in validation strategy, psi f1-score and confusion-mat pr-quantile are not supported in cur version
    if self.metrics is None or len(self.metrics) == 0:
        self.metrics = self.default_metrics[self.eval_type]
        LOGGER.warning('use default metric {} for eval type {}'.format(self.metrics, self.eval_type))

    ban_metric = [consts.PSI, consts.F1_SCORE, consts.CONFUSION_MAT, consts.QUANTILE_PR]
    for metric in self.metrics:
        if metric in ban_metric:
            self.metrics.remove(metric)
    self.check()

Last update: 2021-11-12