Skip to content

Commit 25d049c

Browse files
authored
feat: merge only generates a default index if both inputs already have an index (#733)
* feat: `merge` only generates a default index if both inputs already have an index * add tests for merge with null index
1 parent 56cbd3b commit 25d049c

File tree

7 files changed

+1723
-235
lines changed

7 files changed

+1723
-235
lines changed

bigframes/_config/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ def _init_bigquery_thread_local(self):
6161
@property
6262
def bigquery(self) -> bigquery_options.BigQueryOptions:
6363
"""Options to use with the BigQuery engine."""
64-
if self._local.bigquery_options is not None:
64+
if (
65+
bigquery_options := getattr(self._local, "bigquery_options", None)
66+
) is not None:
6567
# The only way we can get here is if someone called
6668
# _init_bigquery_thread_local.
67-
return self._local.bigquery_options
69+
return bigquery_options
6870

6971
return self._bigquery_options
7072

bigframes/core/blocks.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def __init__(
124124
if len(index_columns) == 0:
125125
warnings.warn(
126126
"Creating object with Null Index. Null Index is a preview feature.",
127-
category=bigframes.exceptions.PreviewWarning,
127+
category=bigframes.exceptions.NullIndexPreviewWarning,
128128
)
129129
self._index_columns = tuple(index_columns)
130130
# Index labels don't need complicated hierarchical access so can store as tuple
@@ -1930,10 +1930,22 @@ def merge(
19301930
coalesce_labels=matching_join_labels,
19311931
suffixes=suffixes,
19321932
)
1933-
# Constructs default index
1934-
offset_index_id = guid.generate_guid()
1935-
expr = joined_expr.promote_offsets(offset_index_id)
1936-
return Block(expr, index_columns=[offset_index_id], column_labels=labels)
1933+
1934+
# Construct a default index only if this object and the other both have
1935+
# indexes. In other words, joining anything to a NULL index object
1936+
# keeps everything as a NULL index.
1937+
#
1938+
# This keeps us from generating an index if the user joins a large
1939+
# BigQuery table against small local data, for example.
1940+
if len(self._index_columns) > 0 and len(other._index_columns) > 0:
1941+
offset_index_id = guid.generate_guid()
1942+
expr = joined_expr.promote_offsets(offset_index_id)
1943+
index_columns = [offset_index_id]
1944+
else:
1945+
expr = joined_expr
1946+
index_columns = []
1947+
1948+
return Block(expr, index_columns=index_columns, column_labels=labels)
19371949

19381950
def join(
19391951
self,

bigframes/exceptions.py

+4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ class PreviewWarning(Warning):
3939
"""The feature is in preview."""
4040

4141

42+
class NullIndexPreviewWarning(PreviewWarning):
43+
"""Null index feature is in preview."""
44+
45+
4246
class NullIndexError(ValueError):
4347
"""Object has no index."""
4448

0 commit comments

Comments
 (0)