@@ -4821,20 +4821,33 @@ def test_to_gbq_table_labels(scalars_df_index):
4821
4821
pytest .param (["A" , "C" ], True , id = "two_arrays_true" ),
4822
4822
],
4823
4823
)
4824
- def test_dataframe_explode (col_names , ignore_index ):
4824
+ def test_dataframe_explode (col_names , ignore_index , session ):
4825
4825
data = {
4826
4826
"A" : [[0 , 1 , 2 ], [], [3 , 4 ]],
4827
4827
"B" : 3 ,
4828
4828
"C" : [["a" , "b" , "c" ], np .nan , ["d" , "e" ]],
4829
4829
}
4830
- df = bpd .DataFrame (data )
4830
+
4831
+ metrics = session ._metrics
4832
+ df = bpd .DataFrame (data , session = session )
4831
4833
pd_df = df .to_pandas ()
4834
+ pd_result = pd_df .explode (col_names , ignore_index = ignore_index )
4835
+ bf_result = df .explode (col_names , ignore_index = ignore_index )
4836
+
4837
+ # Check that to_pandas() results in at most a single query execution
4838
+ execs_pre = metrics .execution_count
4839
+ bf_materialized = bf_result .to_pandas ()
4840
+ execs_post = metrics .execution_count
4841
+
4832
4842
pd .testing .assert_frame_equal (
4833
- df . explode ( col_names , ignore_index = ignore_index ). to_pandas () ,
4834
- pd_df . explode ( col_names , ignore_index = ignore_index ) ,
4843
+ bf_materialized ,
4844
+ pd_result ,
4835
4845
check_index_type = False ,
4836
4846
check_dtype = False ,
4837
4847
)
4848
+ # we test this property on this method in particular as compilation
4849
+ # is non-deterministic and won't use the query cache as implemented
4850
+ assert execs_post - execs_pre <= 1
4838
4851
4839
4852
4840
4853
@pytest .mark .parametrize (
0 commit comments