17
17
18
18
import argparse
19
19
import inspect
20
+ import pathlib
21
+ import sys
20
22
21
23
import pandas as pd
24
+ import pandas .core .groupby
25
+ import pandas .core .indexes .accessors
26
+ import pandas .core .strings .accessor
27
+ import pandas .core .window .rolling
22
28
29
+ import bigframes
30
+ import bigframes .core .groupby
31
+ import bigframes .core .window
32
+ import bigframes .operations .datetimes
23
33
import bigframes .pandas as bpd
24
34
35
+ REPO_ROOT = pathlib .Path (__file__ ).parent .parent
36
+
37
+ URL_PREFIX = {
38
+ "pandas" : (
39
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.pandas#bigframes_pandas_"
40
+ ),
41
+ "dataframe" : (
42
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.dataframe.DataFrame#bigframes_dataframe_DataFrame_"
43
+ ),
44
+ "dataframegroupby" : (
45
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.groupby.DataFrameGroupBy#bigframes_core_groupby_DataFrameGroupBy_"
46
+ ),
47
+ "series" : (
48
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.series.Series#bigframes_series_Series_"
49
+ ),
50
+ "seriesgroupby" : (
51
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.groupby.SeriesGroupBy#bigframes_core_groupby_SeriesGroupBy_"
52
+ ),
53
+ "datetimemethods" : (
54
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.datetimes.DatetimeMethods#bigframes_operations_datetimes_DatetimeMethods_"
55
+ ),
56
+ "stringmethods" : (
57
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.strings.StringMethods#bigframes_operations_strings_StringMethods_"
58
+ ),
59
+ "window" : (
60
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.window.Window#bigframes_core_window_Window_"
61
+ ),
62
+ # TODO: Index not documented.
63
+ }
64
+
65
+
66
+ PANDAS_TARGETS = [
67
+ ("pandas" , pd , bpd ),
68
+ ("dataframe" , pd .DataFrame , bpd .DataFrame ),
69
+ (
70
+ "dataframegroupby" ,
71
+ pandas .core .groupby .DataFrameGroupBy ,
72
+ bigframes .core .groupby .DataFrameGroupBy ,
73
+ ),
74
+ ("series" , pd .Series , bpd .Series ),
75
+ (
76
+ "seriesgroupby" ,
77
+ pandas .core .groupby .DataFrameGroupBy ,
78
+ bigframes .core .groupby .DataFrameGroupBy ,
79
+ ),
80
+ (
81
+ "datetimemethods" ,
82
+ pandas .core .indexes .accessors .CombinedDatetimelikeProperties ,
83
+ bigframes .operations .datetimes .DatetimeMethods ,
84
+ ),
85
+ (
86
+ "stringmethods" ,
87
+ pandas .core .strings .accessor .StringMethods ,
88
+ bigframes .operations .strings .StringMethods ,
89
+ ),
90
+ (
91
+ "window" ,
92
+ pandas .core .window .rolling .Rolling ,
93
+ bigframes .core .window .Window ,
94
+ ),
95
+ ("index" , pd .Index , bpd .Index ),
96
+ ]
97
+
98
+
99
+ def names_from_signature (signature ):
100
+ """Extract the names of parameters from signature
101
+
102
+ See: https://docs.python.org/3/library/inspect.html#inspect.signature
103
+ """
104
+ return frozenset ({parameter for parameter in signature .parameters })
105
+
106
+
107
+ def calculate_missing_parameters (bigframes_function , target_function ):
108
+ bigframes_params = names_from_signature (inspect .signature (bigframes_function ))
109
+ target_params = names_from_signature (inspect .signature (target_function ))
110
+ return target_params - bigframes_params
111
+
25
112
26
113
def generate_pandas_api_coverage ():
27
114
"""Inspect all our pandas objects, and compare with the real pandas objects, to see
28
115
which methods we implement. For each, generate a regex that can be used to check if
29
116
its present in a notebook"""
30
- header = ["api" , "pattern" , "kind" , "is_in_bigframes" ]
117
+ header = ["api" , "pattern" , "kind" , "is_in_bigframes" , "missing_parameters" ]
31
118
api_patterns = []
32
- targets = [
33
- ("pandas" , pd , bpd ),
34
- ("dataframe" , pd .DataFrame , bpd .DataFrame ),
35
- ("series" , pd .Series , bpd .Series ),
36
- ("index" , pd .Index , bpd .Index ),
37
- ]
38
119
indexers = ["loc" , "iloc" , "iat" , "ix" , "at" ]
39
- for name , pandas_obj , bigframes_obj in targets :
120
+ for name , pandas_obj , bigframes_obj in PANDAS_TARGETS :
40
121
for member in dir (pandas_obj ):
122
+ missing_parameters = ""
123
+
41
124
# skip private functions and properties
42
125
if member [0 ] == "_" and member [1 ] != "_" :
43
126
continue
@@ -50,6 +133,17 @@ def generate_pandas_api_coverage():
50
133
# Function, match .member(
51
134
token = f"\\ .{ member } \\ ("
52
135
token_type = "function"
136
+
137
+ if hasattr (bigframes_obj , member ):
138
+ bigframes_function = getattr (bigframes_obj , member )
139
+ pandas_function = getattr (pandas_obj , member )
140
+ missing_parameters = ", " .join (
141
+ sorted (
142
+ calculate_missing_parameters (
143
+ bigframes_function , pandas_function
144
+ )
145
+ )
146
+ )
53
147
elif member in indexers :
54
148
# Indexer, match .indexer[
55
149
token = f"\\ .{ member } \\ ["
@@ -62,7 +156,13 @@ def generate_pandas_api_coverage():
62
156
is_in_bigframes = hasattr (bigframes_obj , member )
63
157
64
158
api_patterns .append (
65
- [f"{ name } .{ member } " , token , token_type , is_in_bigframes ]
159
+ [
160
+ f"{ name } .{ member } " ,
161
+ token ,
162
+ token_type ,
163
+ is_in_bigframes ,
164
+ missing_parameters ,
165
+ ]
66
166
)
67
167
68
168
return pd .DataFrame (api_patterns , columns = header )
@@ -165,14 +265,112 @@ def build_api_coverage_table(bigframes_version: str, release_version: str):
165
265
return combined_df .infer_objects ().convert_dtypes ()
166
266
167
267
268
+ def format_api (api_names , is_in_bigframes , api_prefix ):
269
+ api_names = api_names .str .slice (start = len (f"{ api_prefix } ." ))
270
+ formatted = "<code>" + api_names + "</code>"
271
+ url_prefix = URL_PREFIX .get (api_prefix )
272
+ if url_prefix is None :
273
+ return formatted
274
+
275
+ linked = '<a href="' + url_prefix + api_names + '">' + formatted + "</a>"
276
+ return formatted .mask (is_in_bigframes , linked )
277
+
278
+
279
+ def generate_api_coverage (df , api_prefix ):
280
+ dataframe_apis = df .loc [df ["api" ].str .startswith (f"{ api_prefix } ." )]
281
+ fully_implemented = (
282
+ dataframe_apis ["missing_parameters" ].str .len () == 0
283
+ ) & dataframe_apis ["is_in_bigframes" ]
284
+ partial_implemented = (
285
+ dataframe_apis ["missing_parameters" ].str .len () != 0
286
+ ) & dataframe_apis ["is_in_bigframes" ]
287
+ not_implemented = ~ dataframe_apis ["is_in_bigframes" ]
288
+ dataframe_table = pd .DataFrame (
289
+ {
290
+ "API" : format_api (
291
+ dataframe_apis ["api" ],
292
+ dataframe_apis ["is_in_bigframes" ],
293
+ api_prefix ,
294
+ ),
295
+ "Implemented" : "" ,
296
+ "Missing parameters" : dataframe_apis ["missing_parameters" ],
297
+ }
298
+ )
299
+ dataframe_table .loc [fully_implemented , "Implemented" ] = "Y"
300
+ dataframe_table .loc [partial_implemented , "Implemented" ] = "P"
301
+ dataframe_table .loc [not_implemented , "Implemented" ] = "N"
302
+ return dataframe_table
303
+
304
+
305
+ def generate_api_coverage_doc (df , api_prefix ):
306
+ dataframe_table = generate_api_coverage (df , api_prefix )
307
+ dataframe_table = dataframe_table .loc [~ (dataframe_table ["Implemented" ] == "N" )]
308
+ dataframe_table ["Implemented" ] = dataframe_table ["Implemented" ].map (
309
+ {
310
+ "Y" : "<b>Y</b>" ,
311
+ "P" : "<i>P</i>" ,
312
+ }
313
+ )
314
+
315
+ with open (
316
+ REPO_ROOT / "docs" / "supported_pandas_apis" / f"bf_{ api_prefix } .html" ,
317
+ "w" ,
318
+ ) as html_file :
319
+ dataframe_table .to_html (
320
+ html_file , index = False , header = True , escape = False , border = 0 , col_space = "8em"
321
+ )
322
+
323
+
324
+ def generate_api_coverage_docs (df ):
325
+ for target in PANDAS_TARGETS :
326
+ api_prefix = target [0 ]
327
+ generate_api_coverage_doc (df , api_prefix )
328
+
329
+
330
+ def print_api_coverage_summary (df , api_prefix ):
331
+ dataframe_table = generate_api_coverage (df , api_prefix )
332
+
333
+ print (api_prefix )
334
+ print (dataframe_table [["Implemented" , "API" ]].groupby (["Implemented" ]).count ())
335
+ print (f"{ api_prefix } APIs: { dataframe_table .shape [0 ]} \n " )
336
+
337
+
338
+ def print_api_coverage_summaries (df ):
339
+ for target in PANDAS_TARGETS :
340
+ api_prefix = target [0 ]
341
+ print_api_coverage_summary (df , api_prefix )
342
+
343
+ print (f"\n All APIs: { len (df .index )} " )
344
+ fully_implemented = (df ["missing_parameters" ].str .len () == 0 ) & df [
345
+ "is_in_bigframes"
346
+ ]
347
+ print (f"Y: { fully_implemented .sum ()} " )
348
+ partial_implemented = (df ["missing_parameters" ].str .len () != 0 ) & df [
349
+ "is_in_bigframes"
350
+ ]
351
+ print (f"P: { partial_implemented .sum ()} " )
352
+ not_implemented = ~ df ["is_in_bigframes" ]
353
+ print (f"N: { not_implemented .sum ()} " )
354
+
355
+
168
356
def main ():
169
357
parser = argparse .ArgumentParser ()
170
- parser .add_argument ("--bigframes_version" )
171
- parser .add_argument ("--release_version" )
358
+ parser .add_argument ("output_type" )
359
+ parser .add_argument ("--bigframes_version" , default = bigframes .__version__ )
360
+ parser .add_argument ("--release_version" , default = "" )
172
361
parser .add_argument ("--bigquery_table_name" )
173
362
args = parser .parse_args ()
174
363
df = build_api_coverage_table (args .bigframes_version , args .release_version )
175
- df .to_gbq (args .bigquery_table_name , if_exists = "append" )
364
+
365
+ if args .output_type == "bigquery" :
366
+ df .to_gbq (args .bigquery_table_name , if_exists = "append" )
367
+ elif args .output_type == "docs" :
368
+ generate_api_coverage_docs (df )
369
+ elif args .output_type == "summary" :
370
+ print_api_coverage_summaries (df )
371
+ else :
372
+ print (f"Unexpected output_type { repr (args .output_type )} " )
373
+ sys .exit (1 )
176
374
177
375
178
376
if __name__ == "__main__" :
0 commit comments