|
19 | 19 |
|
20 | 20 | import google.api_core.exceptions
|
21 | 21 | import google.cloud.bigquery
|
| 22 | +import google.cloud.bigquery.table |
22 | 23 | import pytest
|
23 | 24 |
|
24 | 25 | import bigframes
|
| 26 | +import bigframes.exceptions |
25 | 27 |
|
26 | 28 | from .. import resources
|
27 | 29 |
|
@@ -50,6 +52,43 @@ def test_read_gbq_cached_table():
|
50 | 52 | assert "1999-01-02T03:04:05.678901" in df.sql
|
51 | 53 |
|
52 | 54 |
|
| 55 | +def test_read_gbq_clustered_table_ok_default_index_with_primary_key(): |
| 56 | + """If a primary key is set on the table, we use that as the index column |
| 57 | + by default, no error should be raised in this case. |
| 58 | +
|
| 59 | + See internal issue 335727141. |
| 60 | + """ |
| 61 | + table = google.cloud.bigquery.Table("my-project.my_dataset.my_table") |
| 62 | + table.clustering_fields = ["col1", "col2"] |
| 63 | + table.schema = ( |
| 64 | + google.cloud.bigquery.SchemaField("pk_1", "INT64"), |
| 65 | + google.cloud.bigquery.SchemaField("pk_2", "INT64"), |
| 66 | + google.cloud.bigquery.SchemaField("col_1", "INT64"), |
| 67 | + google.cloud.bigquery.SchemaField("col_2", "INT64"), |
| 68 | + ) |
| 69 | + |
| 70 | + # TODO(b/305264153): use setter for table_constraints in client library |
| 71 | + # when available. |
| 72 | + table._properties["tableConstraints"] = { |
| 73 | + "primaryKey": { |
| 74 | + "columns": ["pk_1", "pk_2"], |
| 75 | + }, |
| 76 | + } |
| 77 | + bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True) |
| 78 | + bqclient.project = "test-project" |
| 79 | + bqclient.get_table.return_value = table |
| 80 | + session = resources.create_bigquery_session( |
| 81 | + bqclient=bqclient, table_schema=table.schema |
| 82 | + ) |
| 83 | + table._properties["location"] = session._location |
| 84 | + |
| 85 | + df = session.read_gbq("my-project.my_dataset.my_table") |
| 86 | + |
| 87 | + # There should be no analytic operators to prevent row filtering pushdown. |
| 88 | + assert "OVER" not in df.sql |
| 89 | + assert tuple(df.index.names) == ("pk_1", "pk_2") |
| 90 | + |
| 91 | + |
53 | 92 | @pytest.mark.parametrize(
|
54 | 93 | "not_found_table_id",
|
55 | 94 | [("unknown.dataset.table"), ("project.unknown.table"), ("project.dataset.unknown")],
|
|
0 commit comments