Skip to content

Commit 416d7cb

Browse files
perf: add ability to cache dataframe and series to session table (#51)
1 parent f9a93ce commit 416d7cb

File tree

5 files changed

+48
-0
lines changed

5 files changed

+48
-0
lines changed

bigframes/core/__init__.py

+23
Original file line numberDiff line numberDiff line change
@@ -1145,6 +1145,29 @@ def slice(
11451145
)
11461146
return sliced_expr if step > 0 else sliced_expr.reversed()
11471147

1148+
def cached(self, cluster_cols: typing.Sequence[str]) -> ArrayValue:
1149+
"""Write the ArrayValue to a session table and create a new block object that references it."""
1150+
ibis_expr = self._to_ibis_expr(
1151+
ordering_mode="unordered", expose_hidden_cols=True
1152+
)
1153+
destination = self._session._ibis_to_session_table(
1154+
ibis_expr, cluster_cols=cluster_cols, api_name="cache"
1155+
)
1156+
table_expression = self._session.ibis_client.sql(
1157+
f"SELECT * FROM `_SESSION`.`{destination.table_id}`"
1158+
)
1159+
new_columns = [table_expression[column] for column in self.column_names]
1160+
new_hidden_columns = [
1161+
table_expression[column] for column in self._hidden_ordering_column_names
1162+
]
1163+
return ArrayValue(
1164+
self._session,
1165+
table_expression,
1166+
columns=new_columns,
1167+
hidden_ordering_columns=new_hidden_columns,
1168+
ordering=self._ordering,
1169+
)
1170+
11481171

11491172
class ArrayValueBuilder:
11501173
"""Mutable expression class.

bigframes/core/blocks.py

+9
Original file line numberDiff line numberDiff line change
@@ -1467,6 +1467,15 @@ def to_sql_query(
14671467
idx_labels,
14681468
)
14691469

1470+
def cached(self) -> Block:
1471+
"""Write the block to a session table and create a new block object that references it."""
1472+
return Block(
1473+
self.expr.cached(cluster_cols=self.index_columns),
1474+
index_columns=self.index_columns,
1475+
column_labels=self.column_labels,
1476+
index_labels=self.index_labels,
1477+
)
1478+
14701479
def _is_monotonic(
14711480
self, column_ids: typing.Union[str, Sequence[str]], increasing: bool
14721481
) -> bool:

bigframes/dataframe.py

+3
Original file line numberDiff line numberDiff line change
@@ -2480,3 +2480,6 @@ def _set_block(self, block: blocks.Block):
24802480

24812481
def _get_block(self) -> blocks.Block:
24822482
return self._block
2483+
2484+
def _cached(self) -> DataFrame:
2485+
return DataFrame(self._block.cached())

bigframes/series.py

+3
Original file line numberDiff line numberDiff line change
@@ -1389,6 +1389,9 @@ def _slice(
13891389
),
13901390
)
13911391

1392+
def _cached(self) -> Series:
1393+
return Series(self._block.cached())
1394+
13921395

13931396
def _is_list_like(obj: typing.Any) -> typing_extensions.TypeGuard[typing.Sequence]:
13941397
return pandas.api.types.is_list_like(obj)

tests/system/small/test_dataframe.py

+10
Original file line numberDiff line numberDiff line change
@@ -2717,3 +2717,13 @@ def test_query_job_setters(scalars_df_default_index: dataframe.DataFrame):
27172717
job_ids.add(scalars_df_default_index.query_job.job_id)
27182718

27192719
assert len(job_ids) == 2
2720+
2721+
2722+
def test_df_cached(scalars_df_index):
2723+
df = scalars_df_index.set_index(["int64_too", "int64_col"]).sort_values(
2724+
"string_col"
2725+
)
2726+
df = df[df["rowindex_2"] % 2 == 0]
2727+
2728+
df_cached_copy = df._cached()
2729+
pandas.testing.assert_frame_equal(df.to_pandas(), df_cached_copy.to_pandas())

0 commit comments

Comments
 (0)