tensorflow_model_analysis/metrics/metric_specs.py

# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Specifications for common metrics."""

import collections
import importlib
import json
import re
from typing import Any, Dict, FrozenSet, Iterable, Iterator, List, NamedTuple, Optional, Tuple, Type, Union

import tensorflow as tf
from tensorflow_model_analysis.metrics import aggregation
from tensorflow_model_analysis.metrics import binary_confusion_matrices
from tensorflow_model_analysis.metrics import calibration
from tensorflow_model_analysis.metrics import calibration_plot
from tensorflow_model_analysis.metrics import confusion_matrix_metrics
from tensorflow_model_analysis.metrics import confusion_matrix_plot
from tensorflow_model_analysis.metrics import example_count
from tensorflow_model_analysis.metrics import metric_types
from tensorflow_model_analysis.metrics import metric_util
from tensorflow_model_analysis.metrics import multi_class_confusion_matrix_plot
from tensorflow_model_analysis.metrics import tf_metric_wrapper
from tensorflow_model_analysis.metrics import weighted_example_count
from tensorflow_model_analysis.proto import config_pb2
from tensorflow_model_analysis.slicer import slicer_lib as slicer
from tensorflow_model_analysis.utils import model_util
from tensorflow_model_analysis.utils.keras_lib import tf_keras

from tensorflow_metadata.proto.v0 import schema_pb2


_TF_LOSSES_MODULE = tf_keras.losses.Loss().__class__.__module__

_TFOrTFMAMetricOrLoss = Union[
    tf_keras.metrics.Metric, tf_keras.losses.Loss, metric_types.Metric
]
_TFMetricOrLoss = Union[tf_keras.metrics.Metric, tf_keras.losses.Loss]

# List of metrics or losses optionally keyed by output name.
_MetricsOrLosses = Union[List[_TFOrTFMAMetricOrLoss],
                         Dict[str, List[_TFOrTFMAMetricOrLoss]]]

# TF config settings that TFMA only supports default values for because the
# parameters are not supported by the TFMA implementation of the metric. The
# settings are keyed by class name -> arg_name -> [allowed defaults].
_UNSUPPORTED_TF_SETTINGS = {
    '': {  # All classes
        # TFMA only implements float based versions of TF metrics.
        'dtype': [None, 'float32', tf.float32]
    },
    'AUC': {
        'multi_label': [False],
        'num_labels': [None],
        'label_weights': [None],
        'from_logits': [False],
    },
    'MeanAbsoluteError': {'reduction': ['auto']},
    'MeanSquaredError': {'reduction': ['auto']},
    'MeanAbsolutePercentageError': {'reduction': ['auto']},
    'MeanSqauredLogarithmicError': {'reduction': ['auto']},
    'BinaryCrossEntropy': {'reduction': ['auto']},
    'CategoricalCrossEntropy': {'reduction': ['auto']},
}


def config_from_metric(
    metric: _TFOrTFMAMetricOrLoss) -> config_pb2.MetricConfig:
  """Returns MetricConfig associated with given metric instance."""
  if isinstance(metric, tf_keras.metrics.Metric):
    if _is_supported_tf_metric(metric):
      return _remove_unsupported_tf_settings(_serialize_tf_metric(metric))
    else:
      return _serialize_tf_metric(metric)
  elif isinstance(metric, tf_keras.losses.Loss):
    # For loss like MeanAbsoluteError, TFMA provides native support.
    # The support should be checked here.
    if _is_supported_tf_metric(metric):
      return _remove_unsupported_tf_settings(_serialize_tf_metric(metric))
    else:
      return _serialize_tf_loss(metric)
  elif isinstance(metric, metric_types.Metric):
    return _serialize_tfma_metric(metric)
  else:
    raise NotImplementedError('unknown metric type {}: metric={}'.format(
        type(metric), metric))


def _example_weighted_default(eval_config: config_pb2.EvalConfig,
                              spec: config_pb2.MetricsSpec) -> bool:
  """Returns tue if example weighted is the default for the given spec.

  If any of the models and/or outputs have example weights then example weighted
  will be true by default.

  Args:
    eval_config: Eval config.
    spec: Metrics spec to get default for.
  """

  # Default to using example example weights if an example weight key is set.
  def has_example_weight_key(model_spec: config_pb2.ModelSpec) -> bool:
    if model_spec.example_weight_key:
      return True
    if spec.output_names and model_spec.example_weight_keys:
      for output_name in spec.output_names:
        if output_name in model_spec.example_weight_keys:
          return True
    return False

  if spec.model_names:
    for model_name in spec.model_names:
      model_spec = model_util.get_model_spec(eval_config, model_name)
      if model_spec and has_example_weight_key(model_spec):
        return True
  else:
    for model_spec in eval_config.model_specs:
      if has_example_weight_key(model_spec):
        return True
  return False


def _example_weight_options(eval_config: config_pb2.EvalConfig,
                            spec: config_pb2.MetricsSpec) -> List[bool]:
  """Returns example weight options for given spec."""
  result = []
  if not spec.HasField('example_weights'):
    result.append(_example_weighted_default(eval_config, spec))
  else:
    if spec.example_weights.weighted:
      result.append(True)
    if spec.example_weights.unweighted:
      result.append(False)
  return result


def specs_from_metrics(
    metrics: Optional[_MetricsOrLosses] = None,
    unweighted_metrics: Optional[_MetricsOrLosses] = None,
    model_names: Optional[List[str]] = None,
    output_names: Optional[List[str]] = None,
    output_weights: Optional[Dict[str, float]] = None,
    binarize: Optional[config_pb2.BinarizationOptions] = None,
    aggregate: Optional[config_pb2.AggregationOptions] = None,
    query_key: Optional[str] = None,
    include_example_count: Optional[bool] = None,
    include_weighted_example_count: Optional[bool] = None
) -> List[config_pb2.MetricsSpec]:
  """Returns specs for tf_keras.metrics/losses or tfma.metrics classes.

  Examples:

    metrics_specs = specs_from_metrics(
      [
          tf_keras.metrics.BinaryAccuracy(),
          tfma.metrics.AUC(),
          tfma.metrics.MeanLabel(),
          tfma.metrics.MeanPrediction()
          ...
      ],
      unweighted=[
          tfma.metrics.Precision(),
          tfma.metrics.Recall()
      ])

    metrics_specs = specs_from_metrics({
      'output1': [
          tf_keras.metrics.BinaryAccuracy(),
          tfma.metrics.AUC(),
          tfma.metrics.MeanLabel(),
          tfma.metrics.MeanPrediction()
          ...
      ],
      'output2': [
          tfma.metrics.Precision(),
          tfma.metrics.Recall(),
      ]
    })

  Args:
    metrics: List of tfma.metrics.Metric, tf_keras.metrics.Metric, or
      tf_keras.losses.Loss. For multi-output models a dict of dicts may be
      passed where the first dict is indexed by the output_name. Whether these
      metrics are weighted or not will be determined based on whether the
      ModelSpec associated with the metrics contains example weight key settings
      or not.
    unweighted_metrics: Same as metrics only these metrics will not be weighted
      by example_weight regardless of the example weight key settings.
    model_names: Optional model names (if multi-model evaluation).
    output_names: Optional output names (if multi-output models). If the metrics
      are a dict this should not be set.
    output_weights: Optional output weights for creating overall metric
      aggregated across outputs (if multi-output model). If a weight is not
      provided for an output, it's weight defaults to 0.0 (i.e. output ignored).
    binarize: Optional settings for binarizing multi-class/multi-label metrics.
    aggregate: Optional settings for aggregating multi-class/multi-label
      metrics.
    query_key: Optional query key for query/ranking based metrics.
    include_example_count: True to add example_count metric. Default is True.
    include_weighted_example_count: True to add weighted example_count metric.
      Default is True. A weighted example count will be added per output for
      multi-output models.

  Returns:
    MetricsSpecs based on options provided. A separate spec is returned for
    weighted vs unweighted metrics. A separate spec is also returned for each
    output if a dict of metrics per output is passed.
  """
  if isinstance(metrics, dict) and output_names:
    raise ValueError('metrics cannot be a dict when output_names is used: '
                     'metrics={}, output_names={}'.format(
                         metrics, output_names))
  if (metrics and unweighted_metrics and
      isinstance(metrics, dict) != isinstance(unweighted_metrics, dict)):
    raise ValueError(
        'metrics and unweighted_metrics must both be either dicts or lists: '
        f'metrics={metrics}, unweighted_metrics={unweighted_metrics}')

  if isinstance(metrics, dict) or isinstance(unweighted_metrics, dict):
    metrics_dict = metrics if isinstance(metrics, dict) else {}
    unweighted_metrics_dict = (
        unweighted_metrics if isinstance(unweighted_metrics, dict) else {})
    specs = []
    output_names = set(metrics_dict) | set(unweighted_metrics_dict)
    for output_name in sorted(output_names):
      specs.extend(
          specs_from_metrics(
              metrics_dict.get(output_name),
              unweighted_metrics=unweighted_metrics_dict.get(output_name),
              model_names=model_names,
              output_names=[output_name],
              binarize=binarize,
              aggregate=aggregate,
              include_example_count=include_example_count,
              include_weighted_example_count=include_weighted_example_count))
      include_example_count = False
    return specs

  if include_example_count is None:
    include_example_count = True
  if include_weighted_example_count is None:
    include_weighted_example_count = True

  # Add the computations for the example counts and weights since they are
  # independent of the model and class ID.
  specs = example_count_specs(
      model_names=model_names,
      output_names=output_names,
      output_weights=output_weights,
      include_example_count=include_example_count,
      include_weighted_example_count=include_weighted_example_count)

  if metrics:
    specs.append(
        config_pb2.MetricsSpec(
            metrics=[config_from_metric(metric) for metric in metrics],
            model_names=model_names,
            output_names=output_names,
            output_weights=output_weights,
            binarize=binarize,
            aggregate=aggregate,
            example_weights=None,
            query_key=query_key))
  if unweighted_metrics:
    specs.append(
        config_pb2.MetricsSpec(
            metrics=[
                config_from_metric(metric) for metric in unweighted_metrics
            ],
            model_names=model_names,
            output_names=output_names,
            output_weights=output_weights,
            binarize=binarize,
            aggregate=aggregate,
            example_weights=config_pb2.ExampleWeightOptions(unweighted=True),
            query_key=query_key))

  return specs


def example_count_specs(
    model_names: Optional[List[str]] = None,
    output_names: Optional[List[str]] = None,
    output_weights: Optional[Dict[str, float]] = None,
    include_example_count: bool = True,
    include_weighted_example_count: bool = True
) -> List[config_pb2.MetricsSpec]:
  """Returns metric specs for example count and weighted example counts.

  Args:
    model_names: Optional list of model names (if multi-model evaluation).
    output_names: Optional list of output names (if multi-output model).
    output_weights: Optional output weights for creating overall metric
      aggregated across outputs (if multi-output model). If a weight is not
      provided for an output, it's weight defaults to 0.0 (i.e. output ignored).
    include_example_count: True to add example_count metric.
    include_weighted_example_count: True to add weighted_example_count metric. A
      weighted example count will be added per output for multi-output models.
  """
  specs = []
  if include_example_count:
    metric_config = _serialize_tfma_metric(example_count.ExampleCount())
    specs.append(
        config_pb2.MetricsSpec(
            metrics=[metric_config],
            model_names=model_names,
            example_weights=config_pb2.ExampleWeightOptions(unweighted=True)))
  if include_weighted_example_count:
    # TODO(b/143180976): Replace WeightedExampleCount with ExampleCount once the
    # UI is updated to distinguish weighted for unweighted metrics.
    metric_config = _serialize_tfma_metric(
        weighted_example_count.WeightedExampleCount())
    specs.append(
        config_pb2.MetricsSpec(
            metrics=[metric_config],
            model_names=model_names,
            output_names=output_names,
            output_weights=output_weights,
            example_weights=config_pb2.ExampleWeightOptions(weighted=True)))
  return specs


def default_regression_specs(
    model_names: Optional[List[str]] = None,
    output_names: Optional[List[str]] = None,
    output_weights: Optional[Dict[str, float]] = None,
    loss_functions: Optional[
        List[Union[tf_keras.metrics.Metric, tf_keras.losses.Loss]]
    ] = None,
    min_value: Optional[float] = None,
    max_value: Optional[float] = None,
) -> List[config_pb2.MetricsSpec]:
  """Returns default metric specs for for regression problems.

  Args:
    model_names: Optional model names (if multi-model evaluation).
    output_names: Optional list of output names (if multi-output model).
    output_weights: Optional output weights for creating overall metric
      aggregated across outputs (if multi-output model). If a weight is not
      provided for an output, it's weight defaults to 0.0 (i.e. output ignored).
    loss_functions: Loss functions to use (if None MSE is used).
    min_value: Min value for calibration plot (if None no plot will be created).
    max_value: Max value for calibration plot (if None no plot will be created).
  """

  if loss_functions is None:
    loss_functions = [tf_keras.metrics.MeanSquaredError(name='mse')]

  metrics = [
      tf_keras.metrics.Accuracy(name='accuracy'),
      calibration.MeanLabel(name='mean_label'),
      calibration.MeanPrediction(name='mean_prediction'),
      calibration.Calibration(name='calibration'),
  ]
  for fn in loss_functions:
    metrics.append(fn)
  if min_value is not None and max_value is not None:
    metrics.append(
        calibration_plot.CalibrationPlot(
            name='calibration_plot', left=min_value, right=max_value))

  return specs_from_metrics(
      metrics,
      model_names=model_names,
      output_names=output_names,
      output_weights=output_weights)


def default_binary_classification_specs(
    model_names: Optional[List[str]] = None,
    output_names: Optional[List[str]] = None,
    output_weights: Optional[Dict[str, float]] = None,
    binarize: Optional[config_pb2.BinarizationOptions] = None,
    aggregate: Optional[config_pb2.AggregationOptions] = None,
    include_loss: bool = True) -> List[config_pb2.MetricsSpec]:
  """Returns default metric specs for binary classification problems.

  Args:
    model_names: Optional model names (if multi-model evaluation).
    output_names: Optional list of output names (if multi-output model).
    output_weights: Optional output weights for creating overall metric
      aggregated across outputs (if multi-output model). If a weight is not
      provided for an output, it's weight defaults to 0.0 (i.e. output ignored).
    binarize: Optional settings for binarizing multi-class/multi-label metrics.
    aggregate: Optional settings for aggregating multi-class/multi-label
      metrics.
    include_loss: True to include loss.
  """

  metrics = [
      confusion_matrix_metrics.BinaryAccuracy(name='binary_accuracy'),
      confusion_matrix_metrics.AUC(
          name='auc',
          num_thresholds=binary_confusion_matrices.DEFAULT_NUM_THRESHOLDS),
      confusion_matrix_metrics.AUC(
          name='auc_precison_recall',  # Matches default name used by estimator.
          curve='PR',
          num_thresholds=binary_confusion_matrices.DEFAULT_NUM_THRESHOLDS),
      confusion_matrix_metrics.Precision(name='precision'),
      confusion_matrix_metrics.Recall(name='recall'),
      calibration.MeanLabel(name='mean_label'),
      calibration.MeanPrediction(name='mean_prediction'),
      calibration.Calibration(name='calibration'),
      confusion_matrix_plot.ConfusionMatrixPlot(name='confusion_matrix_plot'),
      calibration_plot.CalibrationPlot(name='calibration_plot')
  ]
  if include_loss:
    metrics.append(tf_keras.metrics.BinaryCrossentropy(name='loss'))

  return specs_from_metrics(
      metrics,
      model_names=model_names,
      output_names=output_names,
      output_weights=output_weights,
      binarize=binarize,
      aggregate=aggregate)


def default_multi_class_classification_specs(
    model_names: Optional[List[str]] = None,
    output_names: Optional[List[str]] = None,
    output_weights: Optional[Dict[str, float]] = None,
    binarize: Optional[config_pb2.BinarizationOptions] = None,
    aggregate: Optional[config_pb2.AggregationOptions] = None,
    sparse: bool = True) -> List[config_pb2.MetricsSpec]:
  """Returns default metric specs for multi-class classification problems.

  Args:
    model_names: Optional model names if multi-model evaluation.
    output_names: Optional list of output names (if multi-output model).
    output_weights: Optional output weights for creating overall metric
      aggregated across outputs (if multi-output model). If a weight is not
      provided for an output, it's weight defaults to 0.0 (i.e. output ignored).
    binarize: Optional settings for binarizing multi-class/multi-label metrics.
    aggregate: Optional settings for aggregating multi-class/multi-label
      metrics.
    sparse: True if the labels are sparse.
  """

  if sparse:
    metrics = [
        tf_keras.metrics.SparseCategoricalCrossentropy(name='loss'),
        tf_keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
    ]
  else:
    metrics = [
        tf_keras.metrics.CategoricalCrossentropy(name='loss'),
        tf_keras.metrics.CategoricalAccuracy(name='accuracy'),
    ]
  metrics.append(
      multi_class_confusion_matrix_plot.MultiClassConfusionMatrixPlot())
  if binarize is not None:
    for top_k in binarize.top_k_list.values:
      metrics.extend([
          confusion_matrix_metrics.Precision(name='precision', top_k=top_k),
          confusion_matrix_metrics.Recall(name='recall', top_k=top_k)
      ])
    binarize_without_top_k = config_pb2.BinarizationOptions()
    binarize_without_top_k.CopyFrom(binarize)
    binarize_without_top_k.ClearField('top_k_list')
    binarize = binarize_without_top_k
  multi_class_metrics = specs_from_metrics(
      metrics,
      model_names=model_names,
      output_names=output_names,
      output_weights=output_weights)
  if aggregate is None:
    aggregate = config_pb2.AggregationOptions(micro_average=True)
  multi_class_metrics.extend(
      default_binary_classification_specs(
          model_names=model_names,
          output_names=output_names,
          output_weights=output_weights,
          binarize=binarize,
          aggregate=aggregate))
  return multi_class_metrics


def metric_instance(
    metric_config: config_pb2.MetricConfig,
    tfma_metric_classes: Optional[Dict[str, Type[metric_types.Metric]]] = None
) -> metric_types.Metric:
  """Creates instance of metric associated with config."""
  if tfma_metric_classes is None:
    tfma_metric_classes = metric_types.registered_metrics()
  if metric_config.class_name in tfma_metric_classes:
    return _deserialize_tfma_metric(metric_config, tfma_metric_classes)
  elif not metric_config.module:
    return _deserialize_tf_metric(metric_config, {})  # pytype: disable=bad-return-type  # typed-keras
  else:
    cls = getattr(
        importlib.import_module(metric_config.module), metric_config.class_name)
    if issubclass(cls, tf_keras.metrics.Metric):
      return _deserialize_tf_metric(metric_config,
                                    {metric_config.class_name: cls})  # pytype: disable=bad-return-type  # typed-keras
    elif issubclass(cls, tf_keras.losses.Loss):
      return _deserialize_tf_loss(metric_config,
                                  {metric_config.class_name: cls})  # pytype: disable=bad-return-type  # typed-keras
    elif issubclass(cls, metric_types.Metric):
      return _deserialize_tfma_metric(metric_config,
                                      {metric_config.class_name: cls})
    else:
      raise NotImplementedError('unknown metric type {}: metric={}'.format(
          cls, metric_config))


def _keys_for_metric(
    metric_name: str,
    spec: config_pb2.MetricsSpec,
    aggregation_type: Optional[metric_types.AggregationType],
    sub_keys: List[Optional[metric_types.SubKey]],
    example_weights: List[Optional[bool]],
) -> Iterator[metric_types.MetricKey]:
  """Yields all non-diff keys for a specific metric name."""
  for model_name in spec.model_names or ['']:
    for output_name in spec.output_names or ['']:
      for sub_key in sub_keys:
        for example_weighted in example_weights:
          key = metric_types.MetricKey(
              name=metric_name,
              model_name=model_name,
              output_name=output_name,
              sub_key=sub_key,
              aggregation_type=aggregation_type,
              example_weighted=example_weighted)
          yield key


def keys_and_metrics_from_specs(
    eval_config: config_pb2.EvalConfig,
    metrics_specs: Iterable[config_pb2.MetricsSpec]
) -> Iterator[Tuple[metric_types.MetricKey, config_pb2.MetricConfig,
                    metric_types.Metric]]:
  """Yields key, config, instance tuples for each non-diff metric in specs."""
  tfma_metric_classes = metric_types.registered_metrics()
  for spec in metrics_specs:
    for aggregation_type, sub_keys in _create_sub_keys(spec).items():
      for metric_config in spec.metrics:
        instance = metric_instance(metric_config, tfma_metric_classes)
        for key in _keys_for_metric(instance.name, spec, aggregation_type,
                                    sub_keys,
                                    _example_weight_options(eval_config, spec)):
          yield key, metric_config, instance


def metric_keys_to_skip_for_confidence_intervals(
    metrics_specs: Iterable[config_pb2.MetricsSpec],
    eval_config: config_pb2.EvalConfig) -> FrozenSet[metric_types.MetricKey]:
  """Returns metric keys not to be displayed with confidence intervals."""
  skipped_keys = []
  for key, _, instance in keys_and_metrics_from_specs(eval_config,
                                                      metrics_specs):
    # if metric does not implement compute_confidence_interval, do not skip
    if not getattr(instance, 'compute_confidence_interval', True):
      skipped_keys.append(key)
  return frozenset(skipped_keys)


# Optional slice and associated threshold setting. If slice is not set it
# matches all slices.
_SliceAndThreshold = Tuple[Optional[Union[config_pb2.SlicingSpec,
                                          config_pb2.CrossSlicingSpec]],
                           Union[config_pb2.GenericChangeThreshold,
                                 config_pb2.GenericValueThreshold]]


def metric_thresholds_from_metrics_specs(
    metrics_specs: Iterable[config_pb2.MetricsSpec],
    eval_config: Optional[config_pb2.EvalConfig] = None
) -> Dict[metric_types.MetricKey, Iterable[_SliceAndThreshold]]:
  """Returns thresholds associated with given metrics specs."""
  if eval_config is None:
    eval_config = config_pb2.EvalConfig()
  result = collections.defaultdict(list)
  existing = collections.defaultdict(dict)

  def add_if_not_exists(
      key: metric_types.MetricKey,
      slice_spec: Optional[Union[config_pb2.SlicingSpec,
                                 config_pb2.CrossSlicingSpec]],
      threshold: Union[config_pb2.GenericChangeThreshold,
                       config_pb2.GenericValueThreshold]):
    """Adds value to results if it doesn't already exist."""
    hashable_slice_spec = None
    if slice_spec:
      hashable_slice_spec = slicer.deserialize_slice_spec(slice_spec)
    # Note that hashing by SerializeToString() is only safe if used within the
    # same process.
    threshold_hash = threshold.SerializeToString()
    if (not (key in existing and hashable_slice_spec in existing[key] and
             threshold_hash in existing[key][hashable_slice_spec])):
      if hashable_slice_spec not in existing[key]:
        existing[key][hashable_slice_spec] = {}
      existing[key][hashable_slice_spec][threshold_hash] = True
      result[key].append((slice_spec, threshold))

  def add_threshold(key: metric_types.MetricKey,
                    slice_spec: Union[Optional[config_pb2.SlicingSpec],
                                      Optional[config_pb2.CrossSlicingSpec]],
                    threshold: config_pb2.MetricThreshold):
    """Adds thresholds to results."""
    if threshold.HasField('value_threshold'):
      add_if_not_exists(key, slice_spec, threshold.value_threshold)
    if threshold.HasField('change_threshold'):
      key = key.make_diff_key()
      add_if_not_exists(key, slice_spec, threshold.change_threshold)

  for spec in metrics_specs:
    for aggregation_type, sub_keys in _create_sub_keys(spec).items():
      # Add thresholds for metrics computed in-graph.
      for metric_name, threshold in spec.thresholds.items():
        for key in _keys_for_metric(metric_name, spec, aggregation_type,
                                    sub_keys, [None]):
          add_threshold(key, None, threshold)
      for metric_name, per_slice_thresholds in spec.per_slice_thresholds.items(
      ):
        for key in _keys_for_metric(metric_name, spec, aggregation_type,
                                    sub_keys, [None]):
          for per_slice_threshold in per_slice_thresholds.thresholds:
            for slice_spec in per_slice_threshold.slicing_specs:
              add_threshold(key, slice_spec, per_slice_threshold.threshold)
      for metric_name, cross_slice_thresholds in (
          spec.cross_slice_thresholds.items()):
        for key in _keys_for_metric(metric_name, spec, aggregation_type,
                                    sub_keys, [None]):
          for cross_slice_threshold in cross_slice_thresholds.thresholds:
            for cross_slice_spec in cross_slice_threshold.cross_slicing_specs:
              add_threshold(key, cross_slice_spec,
                            cross_slice_threshold.threshold)

  # Add thresholds for post export metrics defined in MetricConfigs.
  for key, metric_config, _ in keys_and_metrics_from_specs(
      eval_config, metrics_specs):
    if metric_config.HasField('threshold'):
      add_threshold(key, None, metric_config.threshold)
    for per_slice_threshold in metric_config.per_slice_thresholds:
      for slice_spec in per_slice_threshold.slicing_specs:
        add_threshold(key, slice_spec, per_slice_threshold.threshold)
    for cross_slice_threshold in metric_config.cross_slice_thresholds:
      for cross_slice_spec in cross_slice_threshold.cross_slicing_specs:
        add_threshold(key, cross_slice_spec, cross_slice_threshold.threshold)

  return result


def to_computations(
    metrics_specs: List[config_pb2.MetricsSpec],
    eval_config: Optional[config_pb2.EvalConfig] = None,
    schema: Optional[schema_pb2.Schema] = None
) -> metric_types.MetricComputations:
  """Returns computations associated with given metrics specs."""
  computations = []

  #
  # Split into TF metrics and TFMA metrics
  #

  # Dict[Text, Type[tf_keras.metrics.Metric]]
  tf_metric_classes = {}  # class_name -> class
  # Dict[Text, Type[tf_keras.losses.Loss]]
  tf_loss_classes = {}  # class_name -> class
  # List[metric_types.MetricsSpec]
  tf_metrics_specs = []
  # Dict[Text, Type[metric_types.Metric]]
  tfma_metric_classes = metric_types.registered_metrics()  # class_name -> class
  # List[metric_types.MetricsSpec]
  tfma_metrics_specs = []
  #
  # Note: Lists are used instead of Dicts for the following items because
  # protos are are no hashable.
  #
  # List[List[_TFOrTFMAMetricOrLoss]] (offsets align with metrics_specs).
  per_spec_metric_instances = []
  # List[List[MetricConfig]] (offsets align with metrics_specs).
  per_spec_metric_configs = []
  # List[List[_TFMetricOrLoss]] (offsets align with tf_metrics_specs).
  per_tf_spec_metric_instances = []
  # List[List[metric_types.Metric]]] (offsets align with tfma_metrics_specs).
  per_tfma_spec_metric_instances = []
  for spec in metrics_specs:
    tf_spec = config_pb2.MetricsSpec()
    tf_spec.CopyFrom(spec)
    del tf_spec.metrics[:]
    tfma_spec = config_pb2.MetricsSpec()
    tfma_spec.CopyFrom(spec)
    del tfma_spec.metrics[:]
    for metric in spec.metrics:
      if metric.class_name in tfma_metric_classes:
        tfma_spec.metrics.append(metric)
      elif not metric.module:
        tf_spec.metrics.append(metric)
      else:
        cls = getattr(importlib.import_module(metric.module), metric.class_name)
        if issubclass(cls, tf_keras.metrics.Metric):
          tf_metric_classes[metric.class_name] = cls
          tf_spec.metrics.append(metric)
        elif issubclass(cls, tf_keras.losses.Loss):
          tf_loss_classes[metric.class_name] = cls
          tf_spec.metrics.append(metric)
        else:
          tfma_metric_classes[metric.class_name] = cls
          tfma_spec.metrics.append(metric)

    metric_instances = []
    metric_configs = []
    if tf_spec.metrics:
      tf_metrics_specs.append(tf_spec)
      tf_metric_instances = []
      for m in tf_spec.metrics:
        # To distinguish losses from metrics, losses are required to set the
        # module name.
        if m.module == _TF_LOSSES_MODULE:
          tf_metric_instances.append(_deserialize_tf_loss(m, tf_loss_classes))
        else:
          tf_metric_instances.append(
              _deserialize_tf_metric(m, tf_metric_classes))
      per_tf_spec_metric_instances.append(tf_metric_instances)
      metric_instances.extend(tf_metric_instances)
      metric_configs.extend(tf_spec.metrics)
    if tfma_spec.metrics:
      tfma_metrics_specs.append(tfma_spec)
      tfma_metric_instances = [
          _deserialize_tfma_metric(m, tfma_metric_classes)
          for m in tfma_spec.metrics
      ]
      per_tfma_spec_metric_instances.append(tfma_metric_instances)
      metric_instances.extend(tfma_metric_instances)
      metric_configs.extend(tfma_spec.metrics)
    per_spec_metric_instances.append(metric_instances)
    per_spec_metric_configs.append(metric_configs)

  # Process TF specs
  computations.extend(
      _process_tf_metrics_specs(tf_metrics_specs, per_tf_spec_metric_instances,
                                eval_config))

  # Process TFMA specs
  computations.extend(
      _process_tfma_metrics_specs(tfma_metrics_specs,
                                  per_tfma_spec_metric_instances, eval_config,
                                  schema))

  # Process aggregation based metrics (output aggregation and macro averaging).
  # Note that processing of TF and TFMA specs were setup to create the binarized
  # metrics that macro averaging depends on.
  for i, spec in enumerate(metrics_specs):
    for example_weighted in _example_weight_options(eval_config, spec):
      for aggregation_type, sub_keys in _create_sub_keys(spec).items():
        output_names = spec.output_names or ['']
        output_weights = dict(spec.output_weights)
        if not set(output_weights).issubset(output_names):
          raise ValueError(
              'one or more output_names used in output_weights does not exist: '
              'output_names={}, output_weights={}'.format(
                  output_names, output_weights))
        for model_name in spec.model_names or ['']:
          for sub_key in sub_keys:
            for metric, _ in zip(per_spec_metric_instances[i],
                                 per_spec_metric_configs[i]):
              if (aggregation_type and
                  (aggregation_type.macro_average or
                   aggregation_type.weighted_macro_average)):
                class_weights = _class_weights(spec) or {}
                for output_name in output_names:
                  macro_average_sub_keys = _macro_average_sub_keys(
                      sub_key, class_weights)
                  if aggregation_type.macro_average:
                    computations.extend(
                        aggregation.macro_average(
                            metric.get_config()['name'],
                            sub_keys=macro_average_sub_keys,
                            eval_config=eval_config,
                            model_name=model_name,
                            output_name=output_name,
                            sub_key=sub_key,
                            class_weights=class_weights,
                            example_weighted=example_weighted))
                  elif aggregation_type.weighted_macro_average:
                    computations.extend(
                        aggregation.weighted_macro_average(
                            metric.get_config()['name'],
                            sub_keys=macro_average_sub_keys,
                            eval_config=eval_config,
                            model_name=model_name,
                            output_name=output_name,
                            sub_key=sub_key,
                            class_weights=class_weights,
                            example_weighted=example_weighted))
              if output_weights:
                computations.extend(
                    aggregation.output_average(
                        metric.get_config()['name'],
                        output_weights=output_weights,
                        eval_config=eval_config,
                        model_name=model_name,
                        sub_key=sub_key,
                        example_weighted=example_weighted))

  return computations


def _process_tf_metrics_specs(
    tf_metrics_specs: List[config_pb2.MetricsSpec],
    per_tf_spec_metric_instances: List[List[_TFMetricOrLoss]],
    eval_config: config_pb2.EvalConfig) -> metric_types.MetricComputations:
  """Processes list of TF MetricsSpecs to create computations."""

  # Wrap args into structure that is hashable so we can track unique arg sets.
  class UniqueArgs(
      NamedTuple('UniqueArgs',
                 [('model_name', str),
                  ('sub_key', Optional[metric_types.SubKey]),
                  ('aggregation_type', Optional[metric_types.AggregationType]),
                  ('class_weights', Tuple[Tuple[int, float], ...])])):
    pass

  def _create_private_tf_metrics(
      metrics: List[_TFMetricOrLoss]) -> List[_TFMetricOrLoss]:
    """Creates private versions of TF metrics."""
    result = []
    for m in metrics:
      if isinstance(m, tf_keras.metrics.Metric):
        result.append(_private_tf_metric(m))
      else:
        result.append(_private_tf_loss(m))
    return result

  #
  # Group TF metrics by the subkeys, models and outputs. This is done in reverse
  # because model and subkey processing is done outside of TF and so each unique
  # sub key combination needs to be run through a separate model instance. Note
  # that output_names are handled by the tf_metric_computation since all the
  # outputs are batch calculated in a single model evaluation call.
  #

  # UniqueArgs -> output_name -> [_TFMetricOrLoss]
  metrics_by_unique_args = collections.defaultdict(dict)
  for i, spec in enumerate(tf_metrics_specs):
    metrics = per_tf_spec_metric_instances[i]
    sub_keys_by_aggregation_type = _create_sub_keys(spec)
    # Keep track of metrics that can be shared between macro averaging and
    # binarization. For example, if macro averaging is being performed over 10
    # classes and 5 of the classes are also being binarized, then those 5
    # classes can be re-used by the macro averaging calculation. The remaining
    # 5 classes need to be added as private metrics since those classes were
    # not requested but are still needed for the macro averaging calculation.
    if None in sub_keys_by_aggregation_type:
      shared_sub_keys = set(sub_keys_by_aggregation_type[None])
    else:
      shared_sub_keys = set()
    for aggregation_type, sub_keys in sub_keys_by_aggregation_type.items():
      if aggregation_type:
        class_weights = tuple(sorted((_class_weights(spec) or {}).items()))
      else:
        class_weights = ()
      is_macro = (
          aggregation_type and (aggregation_type.macro_average or
                                aggregation_type.weighted_macro_average))
      for parent_sub_key in sub_keys:
        if is_macro:
          child_sub_keys = _macro_average_sub_keys(parent_sub_key,
                                                   _class_weights(spec))
        else:
          child_sub_keys = [parent_sub_key]
        for output_name in spec.output_names or ['']:
          for sub_key in child_sub_keys:
            if is_macro and sub_key not in shared_sub_keys:
              # Create private metrics for all non-shared metrics.
              instances = _create_private_tf_metrics(metrics)
            else:
              instances = metrics
            for model_name in spec.model_names or ['']:
              unique_args = UniqueArgs(
                  model_name, sub_key,
                  aggregation_type if not is_macro else None,
                  class_weights if not is_macro else ())
              if unique_args not in metrics_by_unique_args:
                # Tuple of weighted and unweighted metrics by output
                metrics_by_unique_args[unique_args] = (
                    collections.defaultdict(list),
                    collections.defaultdict(list))
              for instance in instances:
                for example_weighted in _example_weight_options(
                    eval_config, spec):
                  if example_weighted:
                    metrics_by_unique_args[unique_args][0][output_name].append(
                        instance)
                  else:
                    metrics_by_unique_args[unique_args][1][output_name].append(
                        instance)

  # Convert Unique args and outputs to calls to compute TF metrics
  result = []
  for args, metrics_by_output in metrics_by_unique_args.items():
    class_weights = dict(args.class_weights) if args.class_weights else None
    weighted_metrics_by_output, unweighted_metrics_by_output = metrics_by_output
    if weighted_metrics_by_output:
      result.extend(
          tf_metric_wrapper.tf_metric_computations(
              weighted_metrics_by_output,
              eval_config=eval_config,
              model_name=args.model_name,
              sub_key=args.sub_key,
              aggregation_type=args.aggregation_type,
              class_weights=class_weights,
              example_weighted=True))
    if unweighted_metrics_by_output:
      result.extend(
          tf_metric_wrapper.tf_metric_computations(
              unweighted_metrics_by_output,
              eval_config=eval_config,
              model_name=args.model_name,
              sub_key=args.sub_key,
              aggregation_type=args.aggregation_type,
              class_weights=class_weights,
              example_weighted=False))
  return result


def _process_tfma_metrics_specs(
    tfma_metrics_specs: List[config_pb2.MetricsSpec],
    per_tfma_spec_metric_instances: List[List[metric_types.Metric]],
    eval_config: config_pb2.EvalConfig,
    schema: Optional[schema_pb2.Schema]) -> metric_types.MetricComputations:
  """Processes list of TFMA MetricsSpecs to create computations."""

  #
  # Computations are per metric, so separate by metrics and the specs associated
  # with them.
  #

  # Dict[bytes,List[config_pb2.MetricSpec]] (hash(MetricConfig)->[MetricSpec])
  tfma_specs_by_metric_config = {}
  # Dict[bytes,metric_types.Metric] (hash(MetricConfig)->Metric)
  hashed_metrics = {}
  hashed_configs = {}
  for i, spec in enumerate(tfma_metrics_specs):
    for metric_config, metric in zip(spec.metrics,
                                     per_tfma_spec_metric_instances[i]):
      # Note that hashing by SerializeToString() is only safe if used within the
      # same process.
      config_hash = metric_config.SerializeToString()
      if config_hash not in tfma_specs_by_metric_config:
        hashed_metrics[config_hash] = metric
        hashed_configs[config_hash] = metric_config
        tfma_specs_by_metric_config[config_hash] = []
      tfma_specs_by_metric_config[config_hash].append(spec)

  #
  # Create computations for each metric.
  #

  result = []
  for config_hash, specs in tfma_specs_by_metric_config.items():
    metric = hashed_metrics[config_hash]
    metric_config = hashed_configs[config_hash]
    for spec in specs:
      sub_keys_by_aggregation_type = _create_sub_keys(spec)
      # Keep track of sub-keys that can be shared between macro averaging and
      # binarization. For example, if macro averaging is being performed over
      # 10 classes and 5 of the classes are also being binarized, then those 5
      # classes can be re-used by the macro averaging calculation. The
      # remaining 5 classes need to be added as private metrics since those
      # classes were not requested but are still needed for the macro
      # averaging calculation.
      if None in sub_keys_by_aggregation_type:
        shared_sub_keys = set(sub_keys_by_aggregation_type[None])
      else:
        shared_sub_keys = set()
      for aggregation_type, sub_keys in sub_keys_by_aggregation_type.items():
        class_weights = _class_weights(spec) if aggregation_type else None
        is_macro = (
            aggregation_type and (aggregation_type.macro_average or
                                  aggregation_type.weighted_macro_average))
        if is_macro:
          updated_sub_keys = []
          for sub_key in sub_keys:
            for key in _macro_average_sub_keys(sub_key, class_weights):
              if key not in shared_sub_keys:
                updated_sub_keys.append(key)
          if not updated_sub_keys:
            continue
          aggregation_type = aggregation_type if not is_macro else None
          class_weights = None
          sub_keys = updated_sub_keys
          instance = _private_tfma_metric(metric)
        else:
          instance = metric
        for example_weighted in _example_weight_options(eval_config, spec):
          result.extend(
              instance.computations(
                  eval_config=eval_config,
                  schema=schema,
                  model_names=list(spec.model_names) or [''],
                  output_names=list(spec.output_names) or [''],
                  sub_keys=sub_keys,
                  aggregation_type=aggregation_type,
                  class_weights=class_weights if class_weights else None,
                  example_weighted=example_weighted,
                  query_key=spec.query_key))
  return result


def _create_sub_keys(
    spec: config_pb2.MetricsSpec
) -> Dict[Optional[metric_types.AggregationType],
          List[Optional[metric_types.SubKey]]]:
  """Creates sub keys per aggregation type."""
  result = {}
  if spec.HasField('binarize'):
    sub_keys = []
    if spec.binarize.class_ids.values:
      for v in spec.binarize.class_ids.values:
        sub_keys.append(metric_types.SubKey(class_id=v))
    if spec.binarize.k_list.values:
      for v in spec.binarize.k_list.values:
        sub_keys.append(metric_types.SubKey(k=v))
    if spec.binarize.top_k_list.values:
      for v in spec.binarize.top_k_list.values:
        sub_keys.append(metric_types.SubKey(top_k=v))
    if sub_keys:
      result[None] = sub_keys
  if spec.HasField('aggregate'):
    sub_keys = []
    for top_k in spec.aggregate.top_k_list.values:
      sub_keys.append(metric_types.SubKey(top_k=top_k))
    if not sub_keys:
      sub_keys = [None]
    result[_aggregation_type(spec)] = sub_keys
  return result if result else {None: [None]}


def _macro_average_sub_keys(
    sub_key: Optional[metric_types.SubKey],
    class_weights: Dict[int, float]) -> Iterable[metric_types.SubKey]:
  """Returns sub-keys required in order to compute macro average sub-key.

  Args:
    sub_key: SubKey associated with macro_average or weighted_macro_average.
    class_weights: Class weights associated with sub-key.

  Raises:
    ValueError: If invalid sub-key passed or class weights required but not
      passed.
  """
  if not sub_key:
    if not class_weights:
      raise ValueError(
          'class_weights are required in order to compute macro average over '
          'all classes: sub_key={}, class_weights={}'.format(
              sub_key, class_weights))
    return [metric_types.SubKey(class_id=i) for i in class_weights]
  elif sub_key.top_k:
    return [metric_types.SubKey(k=i + 1) for i in range(sub_key.top_k)]
  else:
    raise ValueError('invalid sub_key for performing macro averaging: '
                     'sub_key={}'.format(sub_key))


def _aggregation_type(
    spec: config_pb2.MetricsSpec) -> Optional[metric_types.AggregationType]:
  """Returns AggregationType associated with AggregationOptions at offset."""
  if spec.aggregate.micro_average:
    return metric_types.AggregationType(micro_average=True)
  if spec.aggregate.macro_average:
    return metric_types.AggregationType(macro_average=True)
  if spec.aggregate.weighted_macro_average:
    return metric_types.AggregationType(weighted_macro_average=True)
  return None


def _class_weights(spec: config_pb2.MetricsSpec) -> Optional[Dict[int, float]]:
  """Returns class weights associated with AggregationOptions at offset."""
  if spec.aggregate.HasField('top_k_list'):
    if spec.aggregate.class_weights:
      raise ValueError('class_weights are not supported when top_k_list used: '
                       'spec={}'.format(spec))
    return None
  return dict(spec.aggregate.class_weights) or None


def _is_supported_tf_metric(tf_metric: _TFMetricOrLoss) -> bool:
  """Returns true if TF metric has an equivalent implementation in TFMA."""
  if not metric_types.is_registered_metric(tf_metric.__class__.__name__):
    return False
  cfg = tf_metric.get_config()
  for cls_name, settings in _UNSUPPORTED_TF_SETTINGS.items():
    if not cls_name or cls_name == tf_metric.__class__.__name__:
      for param, values in settings.items():
        if param in cfg and cfg[param] not in values:
          return False
  return True


def _remove_unsupported_tf_settings(
    metric_config: config_pb2.MetricConfig) -> config_pb2.MetricConfig:
  """Deletes unsupported TF settings from config.

  Removes TF config settings that TFMA only supports default values for because
  the parameters are not supported by the TFMA implementation of the metric.

  Args:
    metric_config: Metric config.

  Returns:
    Updated metric config with unsupported settings removed.
  """
  cfg = _metric_config(metric_config.config)
  for cls_name, settings in _UNSUPPORTED_TF_SETTINGS.items():
    if not cls_name or cls_name == metric_config.class_name:
      for param in settings:
        if param in cfg:
          del cfg[param]

  return config_pb2.MetricConfig(
      class_name=metric_config.class_name,
      config=json.dumps(cfg, sort_keys=True))


def _metric_config(cfg: str) -> Dict[str, Any]:
  """Returns deserializable metric config from JSON string."""
  if not cfg:
    json_cfg = '{}'
  elif cfg[0] != '{':
    json_cfg = '{' + cfg + '}'
  else:
    json_cfg = cfg
  return json.loads(json_cfg)


def _maybe_add_name_to_config(cfg: Dict[str, Any],
                              class_name: str) -> Dict[str, Any]:
  """Adds default name field to metric config if not present."""
  if 'name' not in cfg:
    # Use snake_case version of class name as default name.
    intermediate = re.sub('(.)([A-Z][a-z0-9]+)', r'\1_\2', class_name)
    cfg['name'] = re.sub('([a-z])([A-Z])', r'\1_\2', intermediate).lower()
  return cfg


def _tf_class_and_config(
    metric_config: config_pb2.MetricConfig) -> Tuple[str, Dict[str, Any]]:
  """Returns the tensorflow class and config associated with metric_config."""
  cls_name = metric_config.class_name
  cfg = _metric_config(metric_config.config)

  # The same metric type may be used for different keys when multi-class metrics
  # are used (e.g. AUC for class0, # class1, etc). TF tries to generate unique
  # metric names even though these metrics are already unique within a
  # MetricKey. To workaround this issue, if a name is not set, then add a
  # default name ourselves.
  return cls_name, _maybe_add_name_to_config(cfg, cls_name)


def _serialize_tf_metric(
    metric: tf_keras.metrics.Metric,
) -> config_pb2.MetricConfig:
  """Serializes TF metric."""
  cfg = metric_util.serialize_metric(metric, use_legacy_format=True)
  if (
      tf_keras.utils.get_registered_name(metric.__class__)
      == metric.__class__.__name__
  ):
    module = metric.__class__.__module__
  else:
    module = None
  return config_pb2.MetricConfig(
      class_name=cfg['class_name'],
      module=module,
      config=json.dumps(cfg['config'], sort_keys=True),
  )


def _deserialize_tf_metric(
    metric_config: config_pb2.MetricConfig,
    custom_objects: Dict[str, Type[tf_keras.metrics.Metric]],
) -> tf_keras.metrics.Metric:
  """Deserializes a tf_keras.metrics metric."""
  cls_name, cfg = _tf_class_and_config(metric_config)
  with tf_keras.utils.custom_object_scope(custom_objects):
    return metric_util.deserialize_metric(
        {'class_name': cls_name, 'config': cfg}, use_legacy_format=True
    )


def _private_tf_metric(
    metric: tf_keras.metrics.Metric,
) -> tf_keras.metrics.Metric:
  """Creates a private version of given metric."""
  cfg = metric_util.serialize_metric(metric)
  if not cfg['config']['name'].startswith('_'):
    cfg['config']['name'] = '_' + cfg['config']['name']
  with tf_keras.utils.custom_object_scope(
      {metric.__class__.__name__: metric.__class__}
  ):
    return metric_util.deserialize_metric(cfg, use_legacy_format=True)


def _serialize_tf_loss(loss: tf_keras.losses.Loss) -> config_pb2.MetricConfig:
  """Serializes TF loss."""
  cfg = metric_util.serialize_loss(loss, use_legacy_format=True)
  return config_pb2.MetricConfig(
      class_name=cfg['class_name'],
      module=loss.__class__.__module__,
      config=json.dumps(cfg['config'], sort_keys=True))


def _deserialize_tf_loss(
    metric_config: config_pb2.MetricConfig,
    custom_objects: Dict[str, Type[tf_keras.losses.Loss]],
) -> tf_keras.losses.Loss:
  """Deserializes a tf_keras.loss metric."""
  cls_name, cfg = _tf_class_and_config(metric_config)
  with tf_keras.utils.custom_object_scope(custom_objects):
    return metric_util.deserialize_loss(
        {'class_name': cls_name, 'config': cfg}, use_legacy_format=True
    )


def _private_tf_loss(loss: tf_keras.losses.Loss) -> tf_keras.losses.Loss:
  """Creates a private version of given loss."""
  cfg = metric_util.serialize_loss(loss)
  if not cfg['config']['name'].startswith('_'):
    cfg['config']['name'] = '_' + cfg['config']['name']
  with tf_keras.utils.custom_object_scope(
      {loss.__class__.__name__: loss.__class__}
  ):
    return metric_util.deserialize_loss(cfg, use_legacy_format=True)


def _serialize_tfma_metric(
    metric: metric_types.Metric) -> config_pb2.MetricConfig:
  """Serializes TFMA metric."""
  cfg = metric_util.serialize_keras_object(metric)
  return config_pb2.MetricConfig(
      class_name=cfg['class_name'],
      config=json.dumps(cfg['config'], sort_keys=True))


def _deserialize_tfma_metric(
    metric_config: config_pb2.MetricConfig,
    custom_objects: Dict[str,
                         Type[metric_types.Metric]]) -> metric_types.Metric:
  """Deserializes a tfma.metrics metric."""
  with tf_keras.utils.custom_object_scope(custom_objects):
    return metric_util.deserialize_keras_object({
        'class_name': metric_config.class_name,
        'config': _metric_config(metric_config.config),
    })


def _private_tfma_metric(metric: metric_types.Metric) -> metric_types.Metric:
  """Creates a private version of given metric."""
  cfg = metric_util.serialize_keras_object(metric)
  if not cfg['config']['name'].startswith('_'):
    cfg['config']['name'] = '_' + cfg['config']['name']
  with tf_keras.utils.custom_object_scope(
      {metric.__class__.__name__: metric.__class__}
  ):
    return metric_util.deserialize_keras_object(cfg)