Skip to content

Commit 8bbab38

Browse files
paulgc17tfx-copybara
authored andcommitted
no-op
PiperOrigin-RevId: 430987175
1 parent c8eb75c commit 8bbab38

File tree

6 files changed

+2
-13
lines changed

6 files changed

+2
-13
lines changed

tensorflow_data_validation/api/stats_api.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,6 @@ def expand(
104104
def _sample_at_rate(example: pa.RecordBatch, sample_rate: float
105105
) -> Generator[pa.RecordBatch, None, None]:
106106
"""Sample examples at input sampling rate."""
107-
# TODO(pachristopher): Revisit this to decide if we need to fix a seed
108-
# or add an optional seed argument.
109107
if random.random() <= sample_rate:
110108
yield example
111109

tensorflow_data_validation/api/validation_api.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ def _may_be_set_legacy_flag(schema: schema_pb2.Schema):
129129
schema.generate_legacy_feature_spec = False
130130

131131

132-
# TODO(pachristopher): Add support for updating only a subset of features.
133132
def update_schema(schema: schema_pb2.Schema,
134133
statistics: statistics_pb2.DatasetFeatureStatisticsList,
135134
infer_feature_shape: Optional[bool] = True,

tensorflow_data_validation/statistics/generators/mutual_information.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,8 +529,6 @@ def compute(
529529
result[feature_path] = {self._custom_stats_key: 0.0}
530530
return stats_util.make_dataset_feature_stats_proto(result)
531531

532-
# TODO(pachristopher): Currently encoded examples operate on lists. Consider
533-
# using ndarrays and vectorizing the operations.
534532
encoded_examples = _encode_examples(examples_record_batch,
535533
self._multivalent_features,
536534
self._categorical_features,

tensorflow_data_validation/statistics/stats_impl.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,6 @@ def expand(
8383
slicing_util.GenerateSlicesSqlDoFn(
8484
slice_sqls=self._options.experimental_slice_sqls)))
8585
else:
86-
# TODO(pachristopher): Remove this special case if this doesn't give any
87-
# performance improvement.
8886
dataset = (dataset
8987
| 'KeyWithVoid' >> beam.Map(lambda v: (None, v)))
9088
_ = dataset | 'TrackDistinctSliceKeys' >> _TrackDistinctSliceKeys() # pylint: disable=no-value-for-parameter
@@ -551,9 +549,6 @@ def __init__(
551549
else:
552550
self._desired_batch_size = constants.DEFAULT_DESIRED_INPUT_BATCH_SIZE
553551

554-
# TODO(pachristopher): Understand the cost of incrementing beam counters
555-
# for every input batch. The other option is to update the counters during
556-
# teardown.
557552
# Metrics
558553
self._combine_batch_size = beam.metrics.Metrics.distribution(
559554
constants.METRICS_NAMESPACE, 'combine_batch_size')

tensorflow_data_validation/utils/batch_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from tfx_bsl.coders import batch_util
2929

3030

31-
# TODO(pachristopher): Deprecate this.
31+
# TODO(b/221152546): Deprecate this.
3232
@beam.ptransform_fn
3333
def BatchExamplesToArrowRecordBatches(
3434
examples: beam.PCollection[types.Example],

tensorflow_data_validation/utils/stats_gen_lib.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,7 @@ def generate_statistics_from_dataframe(
225225
merged_partial_stats = _generate_partial_statistics_from_df(
226226
dataframe, stats_options, stats_generators)
227227
else:
228-
# TODO(pachristopher): Investigate why we don't observe linear speedup after
229-
# a certain number of processes.
228+
# TODO(b/144580609): Consider using Beam for inmemory mode as well.
230229
splits = np.array_split(dataframe, n_jobs)
231230
partial_stats = Parallel(n_jobs=n_jobs)(
232231
delayed(_generate_partial_statistics_from_df)(

0 commit comments

Comments
 (0)