Skip to content

Commit d204603

Browse files
perf: repr generates fewer queries (#1046)
1 parent ffb406a commit d204603

File tree

3 files changed

+8
-3
lines changed

3 files changed

+8
-3
lines changed

bigframes/core/blocks.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1557,10 +1557,11 @@ def retrieve_repr_request_results(
15571557
Returns a tuple of the dataframe and the overall number of rows of the query.
15581558
"""
15591559

1560+
# head caches full underlying expression, so row_count will be free after
15601561
head_result = self.session._executor.head(self.expr, max_results)
15611562
count = self.session._executor.get_row_count(self.expr)
15621563

1563-
arrow = self.session._executor.execute(self.expr).to_arrow_table()
1564+
arrow = head_result.to_arrow_table()
15641565
df = io_pandas.arrow_to_pandas(arrow, schema=self.expr.schema)
15651566
self._copy_index_to_pandas(df)
15661567
return df, count, head_result.query_job

bigframes/dataframe.py

-1
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,6 @@ def _repr_html_(self) -> str:
690690
if opts.repr_mode == "deferred":
691691
return formatter.repr_query_job(self._compute_dry_run())
692692

693-
self._cached()
694693
# TODO(swast): pass max_columns and get the true column count back. Maybe
695694
# get 1 more column than we have requested so that pandas can add the
696695
# ... for us?

tests/system/small/test_dataframe.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -591,15 +591,19 @@ def test_join_repr(scalars_dfs_maybe_ordered):
591591
assert actual == expected
592592

593593

594-
def test_repr_html_w_all_rows(scalars_dfs):
594+
def test_repr_html_w_all_rows(scalars_dfs, session):
595+
metrics = session._metrics
595596
scalars_df, _ = scalars_dfs
596597
# get a pandas df of the expected format
597598
df, _ = scalars_df._block.to_pandas()
598599
pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
599600
pandas_df.index.name = scalars_df.index.name
600601

602+
executions_pre = metrics.execution_count
601603
# When there are 10 or fewer rows, the outputs should be identical except for the extra note.
602604
actual = scalars_df.head(10)._repr_html_()
605+
executions_post = metrics.execution_count
606+
603607
with display_options.pandas_repr(bigframes.options.display):
604608
pandas_repr = pandas_df.head(10)._repr_html_()
605609

@@ -608,6 +612,7 @@ def test_repr_html_w_all_rows(scalars_dfs):
608612
+ f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
609613
)
610614
assert actual == expected
615+
assert (executions_post - executions_pre) <= 2
611616

612617

613618
def test_df_column_name_with_space(scalars_dfs):

0 commit comments

Comments
 (0)