Skip to content

Commit 7ab65e8

Browse files
fix: generate unique ids on join to avoid id collisions (#65)
* fix: generate unique ids on join to avoid id collisions
1 parent 61200bd commit 7ab65e8

File tree

1 file changed

+31
-49
lines changed

1 file changed

+31
-49
lines changed

bigframes/core/joins/single_column.py

+31-49
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from __future__ import annotations
1818

19+
import itertools
1920
import typing
2021
from typing import Callable, Literal, Tuple
2122

@@ -25,7 +26,7 @@
2526

2627
import bigframes.constants as constants
2728
import bigframes.core as core
28-
import bigframes.core.guid
29+
import bigframes.core.guid as guid
2930
import bigframes.core.joins.row_identity
3031
import bigframes.core.ordering
3132

@@ -122,17 +123,38 @@ def join_by_column(
122123
),
123124
)
124125
else:
126+
lmapping = {
127+
col_id: guid.generate_guid()
128+
for col_id in itertools.chain(
129+
left.column_names, left._hidden_ordering_column_names
130+
)
131+
}
132+
rmapping = {
133+
col_id: guid.generate_guid()
134+
for col_id in itertools.chain(
135+
right.column_names, right._hidden_ordering_column_names
136+
)
137+
}
138+
139+
def get_column_left(col_id):
140+
return lmapping[col_id]
141+
142+
def get_column_right(col_id):
143+
return rmapping[col_id]
144+
125145
left_table = left._to_ibis_expr(
126146
ordering_mode="unordered",
127147
expose_hidden_cols=True,
148+
col_id_overrides=lmapping,
128149
)
129150
right_table = right._to_ibis_expr(
130151
ordering_mode="unordered",
131152
expose_hidden_cols=True,
153+
col_id_overrides=rmapping,
132154
)
133155
join_conditions = [
134-
value_to_join_key(left_table[left_index])
135-
== value_to_join_key(right_table[right_index])
156+
value_to_join_key(left_table[lmapping[left_index]])
157+
== value_to_join_key(right_table[rmapping[right_index]])
136158
for left_index, right_index in zip(left_column_ids, right_column_ids)
137159
]
138160

@@ -145,38 +167,6 @@ def join_by_column(
145167
rname="{name}_y",
146168
)
147169

148-
def get_column_left(key: str) -> str:
149-
if (
150-
how == "inner"
151-
and key in left_column_ids
152-
and key in combined_table.columns
153-
):
154-
# Ibis doesn't rename the column if the values are guaranteed
155-
# to be equal on left and right (because they're part of an
156-
# inner join condition). See:
157-
# https://github.com/ibis-project/ibis/pull/4651
158-
pass
159-
elif key in right_table.columns:
160-
key = f"{key}_x"
161-
162-
return key
163-
164-
def get_column_right(key: str) -> str:
165-
if (
166-
how == "inner"
167-
and key in right_column_ids
168-
and key in combined_table.columns
169-
):
170-
# Ibis doesn't rename the column if the values are guaranteed
171-
# to be equal on left and right (because they're part of an
172-
# inner join condition). See:
173-
# https://github.com/ibis-project/ibis/pull/4651
174-
pass
175-
elif key in left_table.columns:
176-
key = f"{key}_y"
177-
178-
return key
179-
180170
# Preserve ordering accross joins.
181171
ordering = join_orderings(
182172
left._ordering,
@@ -245,20 +235,14 @@ def get_join_cols(
245235
join_key_cols: list[ibis_types.Value] = []
246236
for left_col, right_col in zip(left_join_cols, right_join_cols):
247237
if not coalesce_join_keys:
248-
join_key_cols.append(
249-
left_col.name(bigframes.core.guid.generate_guid(prefix="index_"))
250-
)
251-
join_key_cols.append(
252-
right_col.name(bigframes.core.guid.generate_guid(prefix="index_"))
253-
)
238+
join_key_cols.append(left_col.name(guid.generate_guid(prefix="index_")))
239+
join_key_cols.append(right_col.name(guid.generate_guid(prefix="index_")))
254240
else:
255241
if how == "left" or how == "inner":
256-
join_key_cols.append(
257-
left_col.name(bigframes.core.guid.generate_guid(prefix="index_"))
258-
)
242+
join_key_cols.append(left_col.name(guid.generate_guid(prefix="index_")))
259243
elif how == "right":
260244
join_key_cols.append(
261-
right_col.name(bigframes.core.guid.generate_guid(prefix="index_"))
245+
right_col.name(guid.generate_guid(prefix="index_"))
262246
)
263247
elif how == "outer":
264248
# The left index and the right index might contain null values, for
@@ -269,16 +253,14 @@ def get_join_cols(
269253
# Don't need to coalesce if they are exactly the same column.
270254
if left_col.name("index").equals(right_col.name("index")):
271255
join_key_cols.append(
272-
left_col.name(
273-
bigframes.core.guid.generate_guid(prefix="index_")
274-
)
256+
left_col.name(guid.generate_guid(prefix="index_"))
275257
)
276258
else:
277259
join_key_cols.append(
278260
ibis.coalesce(
279261
left_col,
280262
right_col,
281-
).name(bigframes.core.guid.generate_guid(prefix="index_"))
263+
).name(guid.generate_guid(prefix="index_"))
282264
)
283265
else:
284266
raise ValueError(

0 commit comments

Comments
 (0)