Closed
Description
Deadline exceeded for large mutation of BigQuery table; not necessarily a bug perhaps it would be helpful to other to add this to the docs, automatically batch mutations by size, make the timeout configurable, or document large data transfers as a potential cause of exceeding the deadline.
Environment details
- Cloud BigTable
- Ubuntu 18.04
- Python 3.6.4
- google-cloud-bigtable==0.31.1
Steps to reproduce and code example
Changing the value of mutation_batch_size
in the following, too large and the deadline is exceeded; with tfexample serialized video examples with 4 frames of size 224x224. Not necessarily a bug if this is the expected behavior and users should handle this kind of batching themselves.
def iterable_dataset_from_file(filename):
dataset = tf.data.TFRecordDataset(filename)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
sess.run(iterator.initializer)
i = 0
while True:
try:
if i % 1000 == 0:
print("Processed %s examples..." % i)
yield sess.run(next_element)
i += 1
except tf.errors.OutOfRangeError:
print("Ran out of examples (processed %s), exiting..." % i)
break
def tfrecord_files_to_cbt_table(glob, table, selection, max_records=100000000,
mutation_batch_size=250):
mutation_index = 0
def new_mutation_batch():
return [None for _ in range(mutation_batch_size)]
files = tf.gfile.Glob(glob)
for file_path in files:
row_mutation_batch = new_mutation_batch()
for i, example in enumerate(iterable_dataset_from_file(file_path)):
idx = hashlib.md5(example).hexdigest()
# DEV: To check "shuffle" effect add the id suffix
idx = "_".join([selection.prefix, idx, str(i)])
row = table.row(idx)
row.set_cell(column_family_id=selection.column_family,
column=selection.column_qualifier,
value=example,
timestamp=datetime.datetime.utcnow())
row_mutation_batch[mutation_index] = row
if mutation_index == (mutation_batch_size - 1):
table.mutate_rows(row_mutation_batch)
row_mutation_batch = new_mutation_batch()
mutation_index = 0
else:
mutation_index += 1
final_mutation = row_mutation_batch[:(mutation_index-1)]
if final_mutation:
table.mutate_rows(final_mutation)
Stack trace
Traceback (most recent call last):
File "/home/jovyan/.local/lib/python3.6/site-packages/google/api_core/grpc_helpers.py", line 79, in next
return six.next(self._wrapped)
File "/opt/conda/lib/python3.6/site-packages/grpc/_channel.py", line 341, in __next__
return self._next()
File "/opt/conda/lib/python3.6/site-packages/grpc/_channel.py", line 335, in _next
raise self
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with (StatusCode.DEADLINE_EXCEEDED, Deadline Exceeded)>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/opt/conda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/opt/conda/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/jovyan/work/pcml/pcml/operations/tfrecord2bigtable.py", line 330, in <module>
tf.app.run()
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "/home/jovyan/work/pcml/pcml/operations/tfrecord2bigtable.py", line 300, in main
max_records=FLAGS.max_records)
File "/home/jovyan/work/pcml/pcml/operations/tfrecord2bigtable.py", line 228, in tfrecord_files_to_cbt_table
table.mutate_rows(row_mutation_batch)
File "/home/jovyan/.local/lib/python3.6/site-packages/google/cloud/bigtable/table.py", line 423, in mutate_rows
return retryable_mutate_rows(retry=retry)
File "/home/jovyan/.local/lib/python3.6/site-packages/google/cloud/bigtable/table.py", line 571, in __call__
mutate_rows()
File "/home/jovyan/.local/lib/python3.6/site-packages/google/api_core/retry.py", line 270, in retry_wrapped_func
on_error=on_error,
File "/home/jovyan/.local/lib/python3.6/site-packages/google/api_core/retry.py", line 179, in retry_target
return target()
File "/home/jovyan/.local/lib/python3.6/site-packages/google/cloud/bigtable/table.py", line 634, in _do_mutate_retryable_rows
for response in responses:
File "/home/jovyan/.local/lib/python3.6/site-packages/google/api_core/grpc_helpers.py", line 81, in next
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.DeadlineExceeded: 504 Deadline Exceeded