Skip to content

Commit 4992cc2

Browse files
fix: Ensure no double execution for to_pandas (#1032)
1 parent a5ad033 commit 4992cc2

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

bigframes/core/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ def _materialize_local(
636636
)
637637
else:
638638
total_rows = execute_result.total_rows
639-
arrow = self.session._executor.execute(self.expr).to_arrow_table()
639+
arrow = execute_result.to_arrow_table()
640640
df = io_pandas.arrow_to_pandas(arrow, schema=self.expr.schema)
641641
self._copy_index_to_pandas(df)
642642

tests/system/small/test_dataframe.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -4821,20 +4821,33 @@ def test_to_gbq_table_labels(scalars_df_index):
48214821
pytest.param(["A", "C"], True, id="two_arrays_true"),
48224822
],
48234823
)
4824-
def test_dataframe_explode(col_names, ignore_index):
4824+
def test_dataframe_explode(col_names, ignore_index, session):
48254825
data = {
48264826
"A": [[0, 1, 2], [], [3, 4]],
48274827
"B": 3,
48284828
"C": [["a", "b", "c"], np.nan, ["d", "e"]],
48294829
}
4830-
df = bpd.DataFrame(data)
4830+
4831+
metrics = session._metrics
4832+
df = bpd.DataFrame(data, session=session)
48314833
pd_df = df.to_pandas()
4834+
pd_result = pd_df.explode(col_names, ignore_index=ignore_index)
4835+
bf_result = df.explode(col_names, ignore_index=ignore_index)
4836+
4837+
# Check that to_pandas() results in at most a single query execution
4838+
execs_pre = metrics.execution_count
4839+
bf_materialized = bf_result.to_pandas()
4840+
execs_post = metrics.execution_count
4841+
48324842
pd.testing.assert_frame_equal(
4833-
df.explode(col_names, ignore_index=ignore_index).to_pandas(),
4834-
pd_df.explode(col_names, ignore_index=ignore_index),
4843+
bf_materialized,
4844+
pd_result,
48354845
check_index_type=False,
48364846
check_dtype=False,
48374847
)
4848+
# we test this property on this method in particular as compilation
4849+
# is non-deterministic and won't use the query cache as implemented
4850+
assert execs_post - execs_pre <= 1
48384851

48394852

48404853
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)