Skip to content

Commit b94bae9

Browse files
authored
fix: loc setitem dtype issue. (#603)
* fix: loc setitem dtype issue. * Update NaN selection * Update code example
1 parent 86e0f38 commit b94bae9

File tree

3 files changed

+23
-7
lines changed

3 files changed

+23
-7
lines changed

bigframes/core/indexers.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,15 @@ def __setitem__(
192192
and isinstance(key[0], bigframes.series.Series)
193193
and key[0].dtype == "boolean"
194194
) and pd.api.types.is_scalar(value):
195-
new_column = key[0].map({True: value, False: None})
195+
# For integer scalar, if set value to a new column, the dtype would be default to float.
196+
# But if set value to an existing Int64 column, the dtype would still be integer.
197+
# So we need to use different NaN type to match this behavior.
198+
new_column = key[0].map(
199+
{
200+
True: value,
201+
False: pd.NA if key[1] in self._dataframe.columns else None,
202+
}
203+
)
196204
try:
197205
original_column = self._dataframe[key[1]]
198206
except KeyError:

tests/system/small/test_dataframe.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -2918,15 +2918,23 @@ def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs):
29182918
)
29192919

29202920

2921-
def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs):
2921+
@pytest.mark.parametrize(
2922+
("col", "value"),
2923+
[
2924+
("string_col", "hello"),
2925+
("int64_col", 3),
2926+
("float64_col", 3.5),
2927+
],
2928+
)
2929+
def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs, col, value):
29222930
if pd.__version__.startswith("1."):
29232931
pytest.skip("this loc overload not supported in pandas 1.x.")
29242932

29252933
scalars_df, scalars_pandas_df = scalars_dfs
29262934
bf_df = scalars_df.copy()
29272935
pd_df = scalars_pandas_df.copy()
2928-
bf_df.loc[bf_df["int64_too"] == 1, "string_col"] = "hello"
2929-
pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = "hello"
2936+
bf_df.loc[bf_df["int64_too"] == 1, col] = value
2937+
pd_df.loc[pd_df["int64_too"] == 1, col] = value
29302938

29312939
pd.testing.assert_frame_equal(
29322940
bf_df.to_pandas(),

third_party/bigframes_vendored/pandas/core/generic.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -662,9 +662,9 @@ def copy(self):
662662
663663
>>> df.loc[df["b"] == 2, "b"] = 22
664664
>>> df
665-
a b
666-
0 1 22.0
667-
1 3 4.0
665+
a b
666+
0 1 22
667+
1 3 4
668668
<BLANKLINE>
669669
[2 rows x 2 columns]
670670
>>> df_copy

0 commit comments

Comments
 (0)