-
Notifications
You must be signed in to change notification settings - Fork 281
/
Copy pathndcg.py
299 lines (265 loc) · 10 KB
/
ndcg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""NDCG (normalized discounted cumulative gain) metric."""
from typing import Dict, Iterable, List, Optional, Tuple, Union
import apache_beam as beam
import numpy as np
from tensorflow_model_analysis.metrics import metric_types
from tensorflow_model_analysis.metrics import metric_util
from tensorflow_model_analysis.proto import config_pb2
from tensorflow_model_analysis.utils import util
NDCG_NAME = 'ndcg'
class NDCG(metric_types.Metric):
"""NDCG (normalized discounted cumulative gain) metric.
Calculates NDCG@k for a given set of top_k values calculated from a list of
gains (relevance scores) that are sorted based on the associated predictions.
The top_k_list can be passed as part of the NDCG metric config or using
tfma.MetricsSpec.binarize.top_k_list if configuring multiple top_k metrics.
The gain (relevance score) is determined from the value stored in the
'gain_key' feature. The value of NDCG@k returned is a weighted average of
NDCG@k over the set of queries using the example weights.
NDCG@k = (DCG@k for the given rank)/(DCG@k
DCG@k = sum_{i=1}^k gain_i/log_2(i+1), where gain_i is the gain (relevance
score) of the i^th ranked response, indexed from 1.
This is a query/ranking based metric so a query_key must also be provided in
the associated tfma.MetricsSpec.
"""
def __init__(
self,
gain_key: str,
top_k_list: Optional[List[int]] = None,
name: str = NDCG_NAME,
):
"""Initializes NDCG.
Args:
gain_key: Key of feature in features dictionary that holds gain values.
top_k_list: Values for top k. This can also be set using the
tfma.MetricsSpec.binarize.top_k_list associated with the metric.
name: Metric name.
"""
super().__init__(_ndcg, gain_key=gain_key, top_k_list=top_k_list, name=name)
metric_types.register_metric(NDCG)
def _ndcg(
gain_key: str,
top_k_list: Optional[List[int]] = None,
name: str = NDCG_NAME,
eval_config: Optional[config_pb2.EvalConfig] = None,
model_names: Optional[List[str]] = None,
output_names: Optional[List[str]] = None,
sub_keys: Optional[List[metric_types.SubKey]] = None,
example_weighted: bool = False,
query_key: str = '',
) -> metric_types.MetricComputations:
"""Returns metric computations for NDCG."""
if not query_key:
raise ValueError('a query_key is required to use NDCG metric')
sub_keys = [k for k in sub_keys if k is not None]
if top_k_list:
if sub_keys is None:
sub_keys = []
for k in top_k_list:
if not any([sub_key.top_k == k for sub_key in sub_keys]):
sub_keys.append(metric_types.SubKey(top_k=k))
if not sub_keys or any([sub_key.top_k is None for sub_key in sub_keys]):
raise ValueError(
'top_k values are required to use NDCG metric: {}'.format(sub_keys)
)
computations = []
for model_name in model_names if model_names else ['']:
for output_name in output_names if output_names else ['']:
keys = []
for sub_key in sub_keys:
keys.append(
metric_types.MetricKey(
name,
model_name=model_name,
output_name=output_name,
sub_key=sub_key,
example_weighted=example_weighted,
)
)
computations.append(
metric_types.MetricComputation(
keys=keys,
preprocessors=[
metric_types.CombinedFeaturePreprocessor(
feature_keys=[query_key, gain_key]
)
],
combiner=_NDCGCombiner(
metric_keys=keys,
eval_config=eval_config,
model_name=model_name,
output_name=output_name,
example_weighted=example_weighted,
query_key=query_key,
gain_key=gain_key,
),
)
)
return computations
class _NDCGAccumulator:
"""NDCG accumulator."""
__slots__ = ['ndcg', 'total_weighted_examples']
def __init__(self, size: int):
self.ndcg = [0.0] * size
self.total_weighted_examples = 0.0
class _NDCGCombiner(beam.CombineFn):
"""Computes NDCG (normalized discounted cumulative gain)."""
def __init__(
self,
metric_keys: List[metric_types.MetricKey],
eval_config: Optional[config_pb2.EvalConfig],
model_name: str,
output_name: str,
example_weighted: bool,
query_key: str,
gain_key: str,
):
"""Initialize.
Args:
metric_keys: Metric keys.
eval_config: Eval config.
model_name: Model name.
output_name: Output name.
example_weighted: True if example weights should be applied.
query_key: Query key.
gain_key: Key of feature in features dictionary that holds gain values.
"""
self._metric_keys = metric_keys
self._eval_config = eval_config
self._model_name = model_name
self._output_name = output_name
self._example_weighted = example_weighted
self._query_key = query_key
self._gain_key = gain_key
def _query(
self, element: metric_types.StandardMetricInputs
) -> Union[float, int, str]:
query = util.get_by_keys(
element.combined_features, [self._query_key]
).flatten()
if query.size == 0 or not np.all(query == query[0]):
raise ValueError(
'missing query value or not all values are the same: value={}, '
'metric_keys={}, StandardMetricInputs={}'.format(
query, self._metric_keys, element
)
)
return query[0]
def _to_gains_example_weight(
self, element: metric_types.StandardMetricInputs
) -> Tuple[np.ndarray, float]:
"""Returns gains and example_weight sorted by prediction."""
_, predictions, example_weight = next(
metric_util.to_label_prediction_example_weight(
element,
eval_config=self._eval_config,
model_name=self._model_name,
output_name=self._output_name,
example_weighted=self._example_weighted,
flatten=False,
require_single_example_weight=True,
)
) # pytype: disable=wrong-arg-types
gains = util.get_by_keys(element.combined_features, [self._gain_key])
if gains.size != predictions.size:
raise ValueError(
'expected {} to be same size as predictions {} != {}: '
'gains={}, metric_keys={}, '
'StandardMetricInputs={}'.format(
self._gain_key,
gains.size,
predictions.size,
gains,
self._metric_keys,
element,
)
)
gains = gains.reshape(predictions.shape)
# Ignore non-positive gains.
if gains.max() <= 0:
example_weight = 0.0
return (gains[np.argsort(predictions)[::-1]], float(example_weight))
def _calculate_dcg_at_k(self, k: int, sorted_values: List[float]) -> float:
"""Calculate the value of DCG@k.
Args:
k: The last position to consider.
sorted_values: A list of gain values assumed to be sorted in the desired
ranking order.
Returns:
The value of DCG@k.
"""
return np.sum(
np.array(sorted_values)[:k] / np.log2(np.array(range(2, k + 2)))
)
def _calculate_ndcg(self, values: List[Tuple[int, float]], k: int) -> float:
"""Calculate NDCG@k, based on given rank and gain values.
Args:
values: A list of tuples representing rank order and gain values.
k: The maximum position to consider in calculating nDCG
Returns:
The value of NDCG@k, for the given list of values.
"""
max_rank = min(k, len(values))
ranked_values = [
gain for _, gain in sorted(values, key=lambda x: x[0], reverse=False)
]
optimal_values = [
gain for _, gain in sorted(values, key=lambda x: x[1], reverse=True)
]
dcg = self._calculate_dcg_at_k(max_rank, ranked_values)
optimal_dcg = self._calculate_dcg_at_k(max_rank, optimal_values)
if optimal_dcg > 0:
return dcg / optimal_dcg
else:
return 0
def create_accumulator(self):
return _NDCGAccumulator(len(self._metric_keys))
def add_input(
self,
accumulator: _NDCGAccumulator,
element: metric_types.StandardMetricInputs,
) -> _NDCGAccumulator:
gains, example_weight = self._to_gains_example_weight(element)
rank_gain = [(pos + 1, gain) for pos, gain in enumerate(gains)]
for i, key in enumerate(self._metric_keys):
if not key.sub_key or key.sub_key.top_k is None:
raise ValueError(
'top_k values are required to use NDCG metric: {}'.format(key)
)
accumulator.ndcg[i] += (
self._calculate_ndcg(rank_gain, key.sub_key.top_k) * example_weight
)
accumulator.total_weighted_examples += float(example_weight)
return accumulator
def merge_accumulators(
self, accumulators: Iterable[_NDCGAccumulator]
) -> _NDCGAccumulator:
accumulators = iter(accumulators)
result = next(accumulators)
for accumulator in accumulators:
result.ndcg = [a + b for a, b in zip(result.ndcg, accumulator.ndcg)]
result.total_weighted_examples += accumulator.total_weighted_examples
return result
def extract_output(
self, accumulator: _NDCGAccumulator
) -> Dict[metric_types.MetricKey, float]:
output = {}
for i, key in enumerate(self._metric_keys):
if accumulator.total_weighted_examples > 0:
output[key] = accumulator.ndcg[i] / accumulator.total_weighted_examples
else:
output[key] = float('nan')
return output