Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,9 @@ def _to_sql_query(
@property
def sql(self) -> str:
"""Compiles this DataFrame's expression tree to SQL."""
include_index = self.index.name is not None or len(self.index.names) > 1
include_index = self._has_index and (
self.index.name is not None or len(self.index.names) > 1
)
sql, _, _ = self._to_sql_query(include_index=include_index)
return sql

Expand Down
2 changes: 2 additions & 0 deletions bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,8 @@ def _read_gbq_table(
table=table,
index_cols=index_cols,
api_name=api_name,
# If non in strict ordering mode, don't go through overhead of scanning index column(s) to determine if unique
metadata_only=not self._strictly_ordered,
)
schema = schemata.ArraySchema.from_bq_table(table)
if columns:
Expand Down
4 changes: 4 additions & 0 deletions bigframes/session/_io/bigquery/read_gbq_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def are_index_cols_unique(
table: bigquery.table.Table,
index_cols: List[str],
api_name: str,
metadata_only: bool = False,
) -> bool:
if len(index_cols) == 0:
return False
Expand All @@ -161,6 +162,9 @@ def are_index_cols_unique(
if (len(primary_keys) > 0) and primary_keys <= frozenset(index_cols):
return True

if metadata_only:
# Sometimes not worth scanning data to check uniqueness
return False
# TODO(b/337925142): Avoid a "SELECT *" subquery here by ensuring
# table_expression only selects just index_cols.
is_unique_sql = bigframes.core.sql.is_distinct_sql(index_cols, table.reference)
Expand Down
9 changes: 9 additions & 0 deletions tests/system/small/test_unordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@
from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas


def test_unordered_mode_sql_no_hash(unordered_session):
bf_df = unordered_session.read_gbq(
"bigquery-public-data.ethereum_blockchain.blocks"
)
sql = bf_df.sql
assert "ORDER BY".casefold() not in sql.casefold()
assert "farm_fingerprint".casefold() not in sql.casefold()


def test_unordered_mode_job_label(unordered_session):
pd_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=pd.Int64Dtype())
df = bpd.DataFrame(pd_df, session=unordered_session)
Expand Down