Skip to content

Commit f8821fe

Browse files
feat: read_pandas accepts pandas Series and Index objects (#573)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent 8d39187 commit f8821fe

File tree

5 files changed

+72
-6
lines changed

5 files changed

+72
-6
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ repos:
3838
rev: v1.1.1
3939
hooks:
4040
- id: mypy
41-
additional_dependencies: [types-requests, types-tabulate]
41+
additional_dependencies: [types-requests, types-tabulate, pandas-stubs]

bigframes/pandas/__init__.py

+15
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,22 @@ def read_gbq_table(
577577
read_gbq_table.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_table)
578578

579579

580+
@typing.overload
580581
def read_pandas(pandas_dataframe: pandas.DataFrame) -> bigframes.dataframe.DataFrame:
582+
...
583+
584+
585+
@typing.overload
586+
def read_pandas(pandas_dataframe: pandas.Series) -> bigframes.series.Series:
587+
...
588+
589+
590+
@typing.overload
591+
def read_pandas(pandas_dataframe: pandas.Index) -> bigframes.core.indexes.Index:
592+
...
593+
594+
595+
def read_pandas(pandas_dataframe: Union[pandas.DataFrame, pandas.Series, pandas.Index]):
581596
return global_session.with_default_session(
582597
bigframes.session.Session.read_pandas,
583598
pandas_dataframe,

bigframes/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1514,7 +1514,7 @@ def map(
15141514
map_df = map_df.rename(columns={arg.name: self.name})
15151515
elif isinstance(arg, Mapping):
15161516
map_df = bigframes.dataframe.DataFrame(
1517-
{"keys": list(arg.keys()), self.name: list(arg.values())},
1517+
{"keys": list(arg.keys()), self.name: list(arg.values())}, # type: ignore
15181518
session=self._get_block().expr.session,
15191519
)
15201520
map_df = map_df.set_index("keys")

bigframes/session/__init__.py

+40-4
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,9 @@
9595

9696
# Avoid circular imports.
9797
if typing.TYPE_CHECKING:
98+
import bigframes.core.indexes
9899
import bigframes.dataframe as dataframe
100+
import bigframes.series
99101

100102
_BIGFRAMES_DEFAULT_CONNECTION_ID = "bigframes-default-connection"
101103

@@ -963,7 +965,23 @@ def read_gbq_model(self, model_name: str):
963965
model = self.bqclient.get_model(model_ref)
964966
return bigframes.ml.loader.from_bq(self, model)
965967

968+
@typing.overload
969+
def read_pandas(
970+
self, pandas_dataframe: pandas.Index
971+
) -> bigframes.core.indexes.Index:
972+
...
973+
974+
@typing.overload
975+
def read_pandas(self, pandas_dataframe: pandas.Series) -> bigframes.series.Series:
976+
...
977+
978+
@typing.overload
966979
def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame:
980+
...
981+
982+
def read_pandas(
983+
self, pandas_dataframe: Union[pandas.DataFrame, pandas.Series, pandas.Index]
984+
):
967985
"""Loads DataFrame from a pandas DataFrame.
968986
969987
The pandas DataFrame will be persisted as a temporary BigQuery table, which can be
@@ -986,13 +1004,31 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame
9861004
[2 rows x 2 columns]
9871005
9881006
Args:
989-
pandas_dataframe (pandas.DataFrame):
990-
a pandas DataFrame object to be loaded.
1007+
pandas_dataframe (pandas.DataFrame, pandas.Series, or pandas.Index):
1008+
a pandas DataFrame/Series/Index object to be loaded.
9911009
9921010
Returns:
993-
bigframes.dataframe.DataFrame: The BigQuery DataFrame.
1011+
An equivalent bigframes.pandas.(DataFrame/Series/Index) object
9941012
"""
995-
return self._read_pandas(pandas_dataframe, "read_pandas")
1013+
import bigframes.series as series
1014+
1015+
# Try to handle non-dataframe pandas objects as well
1016+
if isinstance(pandas_dataframe, pandas.Series):
1017+
bf_df = self._read_pandas(pandas.DataFrame(pandas_dataframe), "read_pandas")
1018+
bf_series = typing.cast(series.Series, bf_df[bf_df.columns[0]])
1019+
# wrapping into df can set name to 0 so reset to original object name
1020+
bf_series.name = pandas_dataframe.name
1021+
return bf_series
1022+
if isinstance(pandas_dataframe, pandas.Index):
1023+
return self._read_pandas(
1024+
pandas.DataFrame(index=pandas_dataframe), "read_pandas"
1025+
).index
1026+
if isinstance(pandas_dataframe, pandas.DataFrame):
1027+
return self._read_pandas(pandas_dataframe, "read_pandas")
1028+
else:
1029+
raise ValueError(
1030+
f"read_pandas() expects a pandas.DataFrame, but got a {type(pandas_dataframe)}"
1031+
)
9961032

9971033
def _read_pandas(
9981034
self, pandas_dataframe: pandas.DataFrame, api_name: str

tests/system/small/test_session.py

+15
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,21 @@ def test_read_pandas(session, scalars_dfs):
421421
pd.testing.assert_frame_equal(result, expected)
422422

423423

424+
def test_read_pandas_series(session):
425+
idx = pd.Index([2, 7, 1, 2, 8], dtype=pd.Int64Dtype())
426+
pd_series = pd.Series([3, 1, 4, 1, 5], dtype=pd.Int64Dtype(), index=idx)
427+
bf_series = session.read_pandas(pd_series)
428+
429+
pd.testing.assert_series_equal(bf_series.to_pandas(), pd_series)
430+
431+
432+
def test_read_pandas_index(session):
433+
pd_idx = pd.Index([2, 7, 1, 2, 8], dtype=pd.Int64Dtype())
434+
bf_idx = session.read_pandas(pd_idx)
435+
436+
pd.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx)
437+
438+
424439
def test_read_pandas_inline_respects_location():
425440
options = bigframes.BigQueryOptions(location="europe-west1")
426441
session = bigframes.Session(options)

0 commit comments

Comments
 (0)