Skip to content

Commit 208a984

Browse files
fix: Fix __repr__ caching with partial ordering (#1016)
1 parent a95493d commit 208a984

File tree

5 files changed

+28
-10
lines changed

5 files changed

+28
-10
lines changed

bigframes/core/tree_properties.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ def can_fast_head(node: nodes.BigFrameNode) -> bool:
4444
"""Can get head fast if can push head operator down to leafs and operators preserve rows."""
4545
if isinstance(node, nodes.LeafNode):
4646
return node.supports_fast_head
47-
if isinstance(node, nodes.UnaryNode):
48-
return node.row_preserving and can_fast_head(node.child)
47+
if isinstance(node, (nodes.ProjectionNode, nodes.SelectionNode)):
48+
return can_fast_head(node.child)
4949
return False
5050

5151

bigframes/dataframe.py

-1
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,6 @@ def __repr__(self) -> str:
643643
if opts.repr_mode == "deferred":
644644
return formatter.repr_query_job(self._compute_dry_run())
645645

646-
self._cached()
647646
# TODO(swast): pass max_columns and get the true column count back. Maybe
648647
# get 1 more column than we have requested so that pandas can add the
649648
# ... for us?

bigframes/session/executor.py

-5
Original file line numberDiff line numberDiff line change
@@ -360,11 +360,6 @@ def _cache_with_cluster_cols(
360360

361361
def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue):
362362
"""Executes the query and uses the resulting table to rewrite future executions."""
363-
364-
if not self.strictly_ordered:
365-
raise ValueError(
366-
"Caching with offsets only supported in strictly ordered mode."
367-
)
368363
offset_column = bigframes.core.guid.generate_guid("bigframes_offsets")
369364
w_offsets, offset_column = array_value.promote_offsets()
370365
sql = self.compiler.compile_unordered(self._get_optimized_plan(w_offsets.node))

tests/system/conftest.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ def session_load() -> Generator[bigframes.Session, None, None]:
154154
session.close() # close generated session at cleanup time
155155

156156

157-
@pytest.fixture(scope="session", params=["ordered", "unordered"])
157+
@pytest.fixture(scope="session", params=["strict", "partial"])
158158
def maybe_ordered_session(request) -> Generator[bigframes.Session, None, None]:
159-
context = bigframes.BigQueryOptions(location="US", ordering_mode="partial")
159+
context = bigframes.BigQueryOptions(location="US", ordering_mode=request.param)
160160
session = bigframes.Session(context=context)
161161
yield session
162162
session.close() # close generated session at cleanup type

tests/system/small/test_dataframe.py

+24
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,30 @@ def test_repr_w_all_rows(scalars_dfs):
567567
assert actual == expected
568568

569569

570+
def test_join_repr(scalars_dfs_maybe_ordered):
571+
scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
572+
573+
scalars_df = (
574+
scalars_df[["int64_col"]]
575+
.join(scalars_df.set_index("int64_col")[["int64_too"]])
576+
.sort_index()
577+
)
578+
scalars_pandas_df = (
579+
scalars_pandas_df[["int64_col"]]
580+
.join(scalars_pandas_df.set_index("int64_col")[["int64_too"]])
581+
.sort_index()
582+
)
583+
# Pandas join result index name seems to depend on the index values in a way that bigframes can't match exactly
584+
scalars_pandas_df.index.name = None
585+
586+
actual = repr(scalars_df)
587+
588+
with display_options.pandas_repr(bigframes.options.display):
589+
expected = repr(scalars_pandas_df)
590+
591+
assert actual == expected
592+
593+
570594
def test_repr_html_w_all_rows(scalars_dfs):
571595
scalars_df, _ = scalars_dfs
572596
# get a pandas df of the expected format

0 commit comments

Comments
 (0)