15
15
from __future__ import annotations
16
16
17
17
import typing
18
- from typing import Tuple
18
+ from typing import Tuple , Union
19
19
20
20
import ibis
21
21
import pandas as pd
29
29
import bigframes .series
30
30
31
31
if typing .TYPE_CHECKING :
32
- LocSingleKey = typing .Union [bigframes .series .Series , indexes .Index , slice ]
32
+ LocSingleKey = Union [
33
+ bigframes .series .Series , indexes .Index , slice , bigframes .core .scalar .Scalar
34
+ ]
33
35
34
36
35
37
class LocSeriesIndexer :
36
38
def __init__ (self , series : bigframes .series .Series ):
37
39
self ._series = series
38
40
39
- def __getitem__ (self , key ) -> bigframes .series .Series :
40
- """
41
- Only indexing by a boolean bigframes.series.Series or list of index entries is currently supported
42
- """
43
- return typing .cast (
44
- bigframes .series .Series , _loc_getitem_series_or_dataframe (self ._series , key )
45
- )
41
+ def __getitem__ (
42
+ self , key
43
+ ) -> Union [bigframes .core .scalar .Scalar , bigframes .series .Series ]:
44
+ return _loc_getitem_series_or_dataframe (self ._series , key )
46
45
47
46
def __setitem__ (self , key , value ) -> None :
48
47
# TODO(swast): support MultiIndex
@@ -84,7 +83,7 @@ def __init__(self, series: bigframes.series.Series):
84
83
85
84
def __getitem__ (
86
85
self , key
87
- ) -> bigframes .core .scalar .Scalar | bigframes .series .Series :
86
+ ) -> Union [ bigframes .core .scalar .Scalar , bigframes .series .Series ] :
88
87
"""
89
88
Index series using integer offsets. Currently supports index by key type:
90
89
@@ -103,13 +102,17 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
103
102
self ._dataframe = dataframe
104
103
105
104
@typing .overload
106
- def __getitem__ (self , key : LocSingleKey ) -> bigframes .dataframe .DataFrame :
105
+ def __getitem__ (
106
+ self , key : LocSingleKey
107
+ ) -> Union [bigframes .dataframe .DataFrame , pd .Series ]:
107
108
...
108
109
109
110
# Technically this is wrong since we can have duplicate column labels, but
110
111
# this is expected to be rare.
111
112
@typing .overload
112
- def __getitem__ (self , key : Tuple [LocSingleKey , str ]) -> bigframes .series .Series :
113
+ def __getitem__ (
114
+ self , key : Tuple [LocSingleKey , str ]
115
+ ) -> Union [bigframes .series .Series , bigframes .core .scalar .Scalar ]:
113
116
...
114
117
115
118
def __getitem__ (self , key ):
@@ -173,7 +176,7 @@ class ILocDataFrameIndexer:
173
176
def __init__ (self , dataframe : bigframes .dataframe .DataFrame ):
174
177
self ._dataframe = dataframe
175
178
176
- def __getitem__ (self , key ) -> bigframes .dataframe .DataFrame | pd .Series :
179
+ def __getitem__ (self , key ) -> Union [ bigframes .dataframe .DataFrame , pd .Series ] :
177
180
"""
178
181
Index dataframe using integer offsets. Currently supports index by key type:
179
182
@@ -188,21 +191,26 @@ def __getitem__(self, key) -> bigframes.dataframe.DataFrame | pd.Series:
188
191
@typing .overload
189
192
def _loc_getitem_series_or_dataframe (
190
193
series_or_dataframe : bigframes .series .Series , key
191
- ) -> bigframes .series .Series :
194
+ ) -> Union [ bigframes .core . scalar . Scalar , bigframes . series .Series ] :
192
195
...
193
196
194
197
195
198
@typing .overload
196
199
def _loc_getitem_series_or_dataframe (
197
200
series_or_dataframe : bigframes .dataframe .DataFrame , key
198
- ) -> bigframes .dataframe .DataFrame :
201
+ ) -> Union [ bigframes .dataframe .DataFrame , pd . Series ] :
199
202
...
200
203
201
204
202
205
def _loc_getitem_series_or_dataframe (
203
- series_or_dataframe : bigframes .dataframe .DataFrame | bigframes .series .Series ,
206
+ series_or_dataframe : Union [ bigframes .dataframe .DataFrame , bigframes .series .Series ] ,
204
207
key : LocSingleKey ,
205
- ) -> bigframes .dataframe .DataFrame | bigframes .series .Series :
208
+ ) -> Union [
209
+ bigframes .dataframe .DataFrame ,
210
+ bigframes .series .Series ,
211
+ pd .Series ,
212
+ bigframes .core .scalar .Scalar ,
213
+ ]:
206
214
if isinstance (key , bigframes .series .Series ) and key .dtype == "boolean" :
207
215
return series_or_dataframe [key ]
208
216
elif isinstance (key , bigframes .series .Series ):
@@ -222,7 +230,7 @@ def _loc_getitem_series_or_dataframe(
222
230
# TODO(henryjsolberg): support MultiIndex
223
231
if len (key ) == 0 : # type: ignore
224
232
return typing .cast (
225
- typing . Union [bigframes .dataframe .DataFrame , bigframes .series .Series ],
233
+ Union [bigframes .dataframe .DataFrame , bigframes .series .Series ],
226
234
series_or_dataframe .iloc [0 :0 ],
227
235
)
228
236
@@ -258,11 +266,22 @@ def _loc_getitem_series_or_dataframe(
258
266
)
259
267
keys_df = keys_df .set_index (index_name , drop = True )
260
268
keys_df .index .name = None
261
- return _perform_loc_list_join (series_or_dataframe , keys_df )
269
+ result = _perform_loc_list_join (series_or_dataframe , keys_df )
270
+ pandas_result = result .to_pandas ()
271
+ # although loc[scalar_key] returns multiple results when scalar_key
272
+ # is not unique, we download the results here and return the computed
273
+ # individual result (as a scalar or pandas series) when the key is unique,
274
+ # since we expect unique index keys to be more common. loc[[scalar_key]]
275
+ # can be used to retrieve one-item DataFrames or Series.
276
+ if len (pandas_result ) == 1 :
277
+ return pandas_result .iloc [0 ]
278
+ # when the key is not unique, we return a bigframes data type
279
+ # as usual for methods that return dataframes/series
280
+ return result
262
281
else :
263
282
raise TypeError (
264
- "Invalid argument type. loc currently only supports indexing with a "
265
- "boolean bigframes Series, a list of index entries or a single index entry . "
283
+ "Invalid argument type. Expected bigframes.Series, bigframes.Index, "
284
+ "list, : (empty slice), or scalar . "
266
285
f"{ constants .FEEDBACK_LINK } "
267
286
)
268
287
@@ -284,9 +303,9 @@ def _perform_loc_list_join(
284
303
285
304
286
305
def _perform_loc_list_join (
287
- series_or_dataframe : bigframes .dataframe .DataFrame | bigframes .series .Series ,
306
+ series_or_dataframe : Union [ bigframes .dataframe .DataFrame , bigframes .series .Series ] ,
288
307
keys_df : bigframes .dataframe .DataFrame ,
289
- ) -> bigframes .series .Series | bigframes .dataframe .DataFrame :
308
+ ) -> Union [ bigframes .series .Series , bigframes .dataframe .DataFrame ] :
290
309
# right join based on the old index so that the matching rows from the user's
291
310
# original dataframe will be duplicated and reordered appropriately
292
311
original_index_names = series_or_dataframe .index .names
@@ -309,20 +328,26 @@ def _perform_loc_list_join(
309
328
@typing .overload
310
329
def _iloc_getitem_series_or_dataframe (
311
330
series_or_dataframe : bigframes .series .Series , key
312
- ) -> bigframes .series .Series | bigframes .core .scalar .Scalar :
331
+ ) -> Union [ bigframes .series .Series , bigframes .core .scalar .Scalar ] :
313
332
...
314
333
315
334
316
335
@typing .overload
317
336
def _iloc_getitem_series_or_dataframe (
318
337
series_or_dataframe : bigframes .dataframe .DataFrame , key
319
- ) -> bigframes .dataframe .DataFrame | pd .Series :
338
+ ) -> Union [ bigframes .dataframe .DataFrame , pd .Series ] :
320
339
...
321
340
322
341
323
342
def _iloc_getitem_series_or_dataframe (
324
- series_or_dataframe : bigframes .dataframe .DataFrame | bigframes .series .Series , key
325
- ) -> bigframes .dataframe .DataFrame | bigframes .series .Series | bigframes .core .scalar .Scalar | pd .Series :
343
+ series_or_dataframe : Union [bigframes .dataframe .DataFrame , bigframes .series .Series ],
344
+ key ,
345
+ ) -> Union [
346
+ bigframes .dataframe .DataFrame ,
347
+ bigframes .series .Series ,
348
+ bigframes .core .scalar .Scalar ,
349
+ pd .Series ,
350
+ ]:
326
351
if isinstance (key , int ):
327
352
internal_slice_result = series_or_dataframe ._slice (key , key + 1 , 1 )
328
353
result_pd_df = internal_slice_result .to_pandas ()
@@ -334,7 +359,7 @@ def _iloc_getitem_series_or_dataframe(
334
359
elif pd .api .types .is_list_like (key ):
335
360
if len (key ) == 0 :
336
361
return typing .cast (
337
- typing . Union [bigframes .dataframe .DataFrame , bigframes .series .Series ],
362
+ Union [bigframes .dataframe .DataFrame , bigframes .series .Series ],
338
363
series_or_dataframe .iloc [0 :0 ],
339
364
)
340
365
df = series_or_dataframe
0 commit comments