googleapis · TrevorBergeron · Jul 23, 2024 · Jul 16, 2024 · Jul 22, 2024 · Jul 23, 2024
@@ -385,7 +385,9 @@ def _to_sql_query(
     @property
     def sql(self) -> str:
         """Compiles this DataFrame's expression tree to SQL."""
-        include_index = self.index.name is not None or len(self.index.names) > 1
+        include_index = self._has_index and (
+            self.index.name is not None or len(self.index.names) > 1
+        )
         sql, _, _ = self._to_sql_query(include_index=include_index)
         return sql
 

@@ -889,6 +889,8 @@ def _read_gbq_table(
             table=table,
             index_cols=index_cols,
             api_name=api_name,
+            # If non in strict ordering mode, don't go through overhead of scanning index column(s) to determine if unique
+            metadata_only=not self._strictly_ordered,
         )
         schema = schemata.ArraySchema.from_bq_table(table)
         if columns:

@@ -152,6 +152,7 @@ def are_index_cols_unique(
     table: bigquery.table.Table,
     index_cols: List[str],
     api_name: str,
+    metadata_only: bool = False,
 ) -> bool:
     if len(index_cols) == 0:
         return False
@@ -161,6 +162,9 @@ def are_index_cols_unique(
     if (len(primary_keys) > 0) and primary_keys <= frozenset(index_cols):
         return True
 
+    if metadata_only:
+        # Sometimes not worth scanning data to check uniqueness
+        return False
     # TODO(b/337925142): Avoid a "SELECT *" subquery here by ensuring
     # table_expression only selects just index_cols.
     is_unique_sql = bigframes.core.sql.is_distinct_sql(index_cols, table.reference)

@@ -20,6 +20,15 @@
 from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
 
 
+def test_unordered_mode_sql_no_hash(unordered_session):
+    bf_df = unordered_session.read_gbq(
+        "bigquery-public-data.ethereum_blockchain.blocks"
+    )
+    sql = bf_df.sql
+    assert "ORDER BY".casefold() not in sql.casefold()
+    assert "farm_fingerprint".casefold() not in sql.casefold()
+
+
 def test_unordered_mode_job_label(unordered_session):
     pd_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=pd.Int64Dtype())
     df = bpd.DataFrame(pd_df, session=unordered_session)