14
14
15
15
from __future__ import annotations
16
16
17
- from dataclasses import dataclass , field
17
+ from dataclasses import dataclass , field , fields
18
18
import functools
19
19
import typing
20
20
from typing import Optional , Tuple
@@ -66,6 +66,13 @@ def session(self):
66
66
return sessions [0 ]
67
67
return None
68
68
69
+ # BigFrameNode trees can be very deep so its important avoid recalculating the hash from scratch
70
+ # Each subclass of BigFrameNode should use this property to implement __hash__
71
+ # The default dataclass-generated __hash__ method is not cached
72
+ @functools .cached_property
73
+ def _node_hash (self ):
74
+ return hash (tuple (hash (getattr (self , field .name )) for field in fields (self )))
75
+
69
76
70
77
@dataclass (frozen = True )
71
78
class UnaryNode (BigFrameNode ):
@@ -95,6 +102,9 @@ class JoinNode(BigFrameNode):
95
102
def child_nodes (self ) -> typing .Sequence [BigFrameNode ]:
96
103
return (self .left_child , self .right_child )
97
104
105
+ def __hash__ (self ):
106
+ return self ._node_hash
107
+
98
108
99
109
@dataclass (frozen = True )
100
110
class ConcatNode (BigFrameNode ):
@@ -104,13 +114,19 @@ class ConcatNode(BigFrameNode):
104
114
def child_nodes (self ) -> typing .Sequence [BigFrameNode ]:
105
115
return self .children
106
116
117
+ def __hash__ (self ):
118
+ return self ._node_hash
119
+
107
120
108
121
# Input Nodex
109
122
@dataclass (frozen = True )
110
123
class ReadLocalNode (BigFrameNode ):
111
124
feather_bytes : bytes
112
125
column_ids : typing .Tuple [str , ...]
113
126
127
+ def __hash__ (self ):
128
+ return self ._node_hash
129
+
114
130
115
131
# TODO: Refactor to take raw gbq object reference
116
132
@dataclass (frozen = True )
@@ -125,45 +141,70 @@ class ReadGbqNode(BigFrameNode):
125
141
def session (self ):
126
142
return (self .table_session ,)
127
143
144
+ def __hash__ (self ):
145
+ return self ._node_hash
146
+
128
147
129
148
# Unary nodes
130
149
@dataclass (frozen = True )
131
150
class DropColumnsNode (UnaryNode ):
132
151
columns : Tuple [str , ...]
133
152
153
+ def __hash__ (self ):
154
+ return self ._node_hash
155
+
134
156
135
157
@dataclass (frozen = True )
136
158
class PromoteOffsetsNode (UnaryNode ):
137
159
col_id : str
138
160
161
+ def __hash__ (self ):
162
+ return self ._node_hash
163
+
139
164
140
165
@dataclass (frozen = True )
141
166
class FilterNode (UnaryNode ):
142
167
predicate_id : str
143
168
keep_null : bool = False
144
169
170
+ def __hash__ (self ):
171
+ return self ._node_hash
172
+
145
173
146
174
@dataclass (frozen = True )
147
175
class OrderByNode (UnaryNode ):
148
176
by : Tuple [OrderingColumnReference , ...]
149
177
178
+ def __hash__ (self ):
179
+ return self ._node_hash
180
+
150
181
151
182
@dataclass (frozen = True )
152
183
class ReversedNode (UnaryNode ):
153
- pass
184
+ # useless field to make sure has distinct hash
185
+ reversed : bool = True
186
+
187
+ def __hash__ (self ):
188
+ return self ._node_hash
154
189
155
190
156
191
@dataclass (frozen = True )
157
192
class SelectNode (UnaryNode ):
158
193
column_ids : typing .Tuple [str , ...]
159
194
195
+ def __hash__ (self ):
196
+ return self ._node_hash
197
+
160
198
161
199
@dataclass (frozen = True )
162
200
class ProjectUnaryOpNode (UnaryNode ):
163
201
input_id : str
164
202
op : ops .UnaryOp
165
203
output_id : Optional [str ] = None
166
204
205
+ def __hash__ (self ):
206
+ return self ._node_hash
207
+
167
208
168
209
@dataclass (frozen = True )
169
210
class ProjectBinaryOpNode (UnaryNode ):
@@ -172,6 +213,9 @@ class ProjectBinaryOpNode(UnaryNode):
172
213
op : ops .BinaryOp
173
214
output_id : str
174
215
216
+ def __hash__ (self ):
217
+ return self ._node_hash
218
+
175
219
176
220
@dataclass (frozen = True )
177
221
class ProjectTernaryOpNode (UnaryNode ):
@@ -181,19 +225,28 @@ class ProjectTernaryOpNode(UnaryNode):
181
225
op : ops .TernaryOp
182
226
output_id : str
183
227
228
+ def __hash__ (self ):
229
+ return self ._node_hash
230
+
184
231
185
232
@dataclass (frozen = True )
186
233
class AggregateNode (UnaryNode ):
187
234
aggregations : typing .Tuple [typing .Tuple [str , agg_ops .AggregateOp , str ], ...]
188
235
by_column_ids : typing .Tuple [str , ...] = tuple ([])
189
236
dropna : bool = True
190
237
238
+ def __hash__ (self ):
239
+ return self ._node_hash
240
+
191
241
192
242
# TODO: Unify into aggregate
193
243
@dataclass (frozen = True )
194
244
class CorrNode (UnaryNode ):
195
245
corr_aggregations : typing .Tuple [typing .Tuple [str , str , str ], ...]
196
246
247
+ def __hash__ (self ):
248
+ return self ._node_hash
249
+
197
250
198
251
@dataclass (frozen = True )
199
252
class WindowOpNode (UnaryNode ):
@@ -204,10 +257,14 @@ class WindowOpNode(UnaryNode):
204
257
never_skip_nulls : bool = False
205
258
skip_reproject_unsafe : bool = False
206
259
260
+ def __hash__ (self ):
261
+ return self ._node_hash
262
+
207
263
208
264
@dataclass (frozen = True )
209
265
class ReprojectOpNode (UnaryNode ):
210
- pass
266
+ def __hash__ (self ):
267
+ return self ._node_hash
211
268
212
269
213
270
@dataclass (frozen = True )
@@ -223,19 +280,28 @@ class UnpivotNode(UnaryNode):
223
280
] = (pandas .Float64Dtype (),)
224
281
how : typing .Literal ["left" , "right" ] = "left"
225
282
283
+ def __hash__ (self ):
284
+ return self ._node_hash
285
+
226
286
227
287
@dataclass (frozen = True )
228
288
class AssignNode (UnaryNode ):
229
289
source_id : str
230
290
destination_id : str
231
291
292
+ def __hash__ (self ):
293
+ return self ._node_hash
294
+
232
295
233
296
@dataclass (frozen = True )
234
297
class AssignConstantNode (UnaryNode ):
235
298
destination_id : str
236
299
value : typing .Hashable
237
300
dtype : typing .Optional [bigframes .dtypes .Dtype ]
238
301
302
+ def __hash__ (self ):
303
+ return self ._node_hash
304
+
239
305
240
306
@dataclass (frozen = True )
241
307
class RandomSampleNode (UnaryNode ):
@@ -244,3 +310,6 @@ class RandomSampleNode(UnaryNode):
244
310
@property
245
311
def deterministic (self ) -> bool :
246
312
return False
313
+
314
+ def __hash__ (self ):
315
+ return self ._node_hash
0 commit comments