fix: Ensure no double execution for to_pandas (#1032)

TrevorBergeron · web-flow · commit 4992cc27e46b · 2024-10-01T12:12:06.000-05:00
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -636,7 +636,7 @@ def _materialize_local(
             )
         else:
             total_rows = execute_result.total_rows
-            arrow = self.session._executor.execute(self.expr).to_arrow_table()
+            arrow = execute_result.to_arrow_table()
             df = io_pandas.arrow_to_pandas(arrow, schema=self.expr.schema)
             self._copy_index_to_pandas(df)
 
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -4821,20 +4821,33 @@ def test_to_gbq_table_labels(scalars_df_index):
         pytest.param(["A", "C"], True, id="two_arrays_true"),
     ],
 )
-def test_dataframe_explode(col_names, ignore_index):
+def test_dataframe_explode(col_names, ignore_index, session):
     data = {
         "A": [[0, 1, 2], [], [3, 4]],
         "B": 3,
         "C": [["a", "b", "c"], np.nan, ["d", "e"]],
     }
-    df = bpd.DataFrame(data)
+
+    metrics = session._metrics
+    df = bpd.DataFrame(data, session=session)
     pd_df = df.to_pandas()
+    pd_result = pd_df.explode(col_names, ignore_index=ignore_index)
+    bf_result = df.explode(col_names, ignore_index=ignore_index)
+
+    # Check that to_pandas() results in at most a single query execution
+    execs_pre = metrics.execution_count
+    bf_materialized = bf_result.to_pandas()
+    execs_post = metrics.execution_count
+
     pd.testing.assert_frame_equal(
-        df.explode(col_names, ignore_index=ignore_index).to_pandas(),
-        pd_df.explode(col_names, ignore_index=ignore_index),
+        bf_materialized,
+        pd_result,
         check_index_type=False,
         check_dtype=False,
     )
+    # we test this property on this method in particular as compilation
+    # is non-deterministic and won't use the query cache as implemented
+    assert execs_post - execs_pre <= 1
 
 
 @pytest.mark.parametrize(

Original file line number	Diff line number	Diff line change
`@@ -636,7 +636,7 @@ def _materialize_local(`
`636`	`636`	`)`
`637`	`637`	`else:`
`638`	`638`	`total_rows = execute_result.total_rows`
`639`		`- arrow = self.session._executor.execute(self.expr).to_arrow_table()`
	`639`	`+ arrow = execute_result.to_arrow_table()`
`640`	`640`	`df = io_pandas.arrow_to_pandas(arrow, schema=self.expr.schema)`
`641`	`641`	`self._copy_index_to_pandas(df)`
`642`	`642`