Skip to content

Commit 20e5c58

Browse files
sycaigcf-owl-bot[bot]tswast
authored
feat: Add warning when user tries to access struct series fields with __getitem__ (#1082)
* feat: add warning when accessing structs with wrong method * polishing words * move test from unit to system * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add more tests * polish tests * upgrade test setup * define a dedicated warning and set a stack level at warning site * fix format * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix doc * Apply suggestions from code review * update doc --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent 9a6b313 commit 20e5c58

File tree

4 files changed

+134
-0
lines changed

4 files changed

+134
-0
lines changed

bigframes/core/indexers.py

+23
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import typing
1818
from typing import Tuple, Union
19+
import warnings
1920

2021
import bigframes_vendored.constants as constants
2122
import ibis
@@ -27,6 +28,8 @@
2728
import bigframes.core.indexes as indexes
2829
import bigframes.core.scalar
2930
import bigframes.dataframe
31+
import bigframes.dtypes
32+
import bigframes.exceptions
3033
import bigframes.operations as ops
3134
import bigframes.series
3235

@@ -370,6 +373,7 @@ def _perform_loc_list_join(
370373
# right join based on the old index so that the matching rows from the user's
371374
# original dataframe will be duplicated and reordered appropriately
372375
if isinstance(series_or_dataframe, bigframes.series.Series):
376+
_struct_accessor_check_and_warn(series_or_dataframe, keys_index)
373377
original_name = series_or_dataframe.name
374378
name = series_or_dataframe.name if series_or_dataframe.name is not None else "0"
375379
result = typing.cast(
@@ -391,6 +395,25 @@ def _perform_loc_list_join(
391395
return result
392396

393397

398+
def _struct_accessor_check_and_warn(
399+
series: bigframes.series.Series, index: indexes.Index
400+
):
401+
if not bigframes.dtypes.is_struct_like(series.dtype):
402+
# No need to check series that do not have struct values
403+
return
404+
405+
if not bigframes.dtypes.is_string_like(index.dtype):
406+
# No need to check indexing with non-string values.
407+
return
408+
409+
if not bigframes.dtypes.is_string_like(series.index.dtype):
410+
warnings.warn(
411+
"Are you trying to access struct fields? If so, please use Series.struct.field(...) method instead.",
412+
category=bigframes.exceptions.BadIndexerKeyWarning,
413+
stacklevel=7, # Stack depth from series.__getitem__ to here
414+
)
415+
416+
394417
@typing.overload
395418
def _iloc_getitem_series_or_dataframe(
396419
series_or_dataframe: bigframes.series.Series, key

bigframes/exceptions.py

+4
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,7 @@ class UnknownDataTypeWarning(Warning):
7373

7474
class ApiDeprecationWarning(FutureWarning):
7575
"""The API has been deprecated."""
76+
77+
78+
class BadIndexerKeyWarning(Warning):
79+
"""The indexer key is not used correctly."""

tests/system/small/core/__init__.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import warnings
16+
17+
import pyarrow as pa
18+
import pytest
19+
20+
import bigframes.exceptions
21+
import bigframes.pandas as bpd
22+
23+
24+
@pytest.fixture(scope="module")
25+
def string_indexed_struct_series(session):
26+
return bpd.Series(
27+
[
28+
{"project": "pandas", "version": 1},
29+
],
30+
dtype=bpd.ArrowDtype(
31+
pa.struct([("project", pa.string()), ("version", pa.int64())])
32+
),
33+
index=["a"],
34+
session=session,
35+
)
36+
37+
38+
@pytest.fixture(scope="module")
39+
def number_series(session):
40+
return bpd.Series(
41+
[0],
42+
dtype=bpd.Int64Dtype,
43+
session=session,
44+
)
45+
46+
47+
@pytest.fixture(scope="module")
48+
def string_indexed_number_series(session):
49+
return bpd.Series(
50+
[0],
51+
dtype=bpd.Int64Dtype,
52+
index=["a"],
53+
session=session,
54+
)
55+
56+
57+
def test_non_string_indexed_struct_series_with_string_key_should_warn(session):
58+
s = bpd.Series(
59+
[
60+
{"project": "pandas", "version": 1},
61+
],
62+
dtype=bpd.ArrowDtype(
63+
pa.struct([("project", pa.string()), ("version", pa.int64())])
64+
),
65+
session=session,
66+
)
67+
68+
with pytest.warns(bigframes.exceptions.BadIndexerKeyWarning):
69+
s["a"]
70+
71+
72+
@pytest.mark.parametrize(
73+
"series",
74+
[
75+
"string_indexed_struct_series",
76+
"number_series",
77+
"string_indexed_number_series",
78+
],
79+
)
80+
@pytest.mark.parametrize(
81+
"key",
82+
[
83+
0,
84+
"a",
85+
],
86+
)
87+
def test_struct_series_indexers_should_not_warn(request, series, key):
88+
s = request.getfixturevalue(series)
89+
90+
with warnings.catch_warnings():
91+
warnings.simplefilter(
92+
"error", category=bigframes.exceptions.BadIndexerKeyWarning
93+
)
94+
s[key]

0 commit comments

Comments
 (0)