Skip to content

Commit d2d425a

Browse files
authored
feat: read_gbq_table supports LIKE as a operator in filters (#454)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes internal issue 330149095 🦕
1 parent 718a00c commit d2d425a

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

bigframes/session/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ def _to_query(
318318
valid_operators: Mapping[third_party_pandas_gbq.FilterOps, str] = {
319319
"in": "IN",
320320
"not in": "NOT IN",
321+
"LIKE": "LIKE",
321322
"==": "=",
322323
">": ">",
323324
"<": "<",

tests/system/small/test_session.py

+12
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,18 @@ def test_read_gbq_twice_with_same_timestamp(session, penguins_table_id):
327327
assert df3 is not None
328328

329329

330+
def test_read_gbq_table_clustered_with_filter(session: bigframes.Session):
331+
df = session.read_gbq_table(
332+
"bigquery-public-data.cloud_storage_geo_index.landsat_index",
333+
filters=[[("sensor_id", "LIKE", "OLI%")], [("sensor_id", "LIKE", "%TIRS")]], # type: ignore
334+
columns=["sensor_id"],
335+
)
336+
sensors = df.groupby(["sensor_id"]).agg("count").to_pandas(ordered=False)
337+
assert "OLI" in sensors.index
338+
assert "TIRS" in sensors.index
339+
assert "OLI_TIRS" in sensors.index
340+
341+
330342
def test_read_gbq_wildcard(session: bigframes.Session):
331343
df = session.read_gbq("bigquery-public-data.noaa_gsod.gsod193*")
332344
assert df.shape == (348485, 32)

third_party/bigframes_vendored/pandas/io/gbq.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from bigframes import constants
99

10-
FilterOps = Literal["in", "not in", "<", "<=", "==", "!=", ">=", ">"]
10+
FilterOps = Literal["in", "not in", "<", "<=", "==", "!=", ">=", ">", "LIKE"]
1111
FilterType = Tuple[str, FilterOps, Any]
1212
FiltersType = Union[Iterable[FilterType], Iterable[Iterable[FilterType]]]
1313

@@ -112,7 +112,7 @@ def read_gbq(
112112
query results.
113113
filters (Union[Iterable[FilterType], Iterable[Iterable[FilterType]]], default ()): To
114114
filter out data. Filter syntax: [[(column, op, val), …],…] where
115-
op is [==, >, >=, <, <=, !=, in, not in]. The innermost tuples
115+
op is [==, >, >=, <, <=, !=, in, not in, LIKE]. The innermost tuples
116116
are transposed into a set of filters applied through an AND
117117
operation. The outer Iterable combines these sets of filters
118118
through an OR operation. A single Iterable of tuples can also

0 commit comments

Comments
 (0)