Skip to content

Commit 74c3915

Browse files
authored
docs: add "Supported pandas APIs" reference to the documentation (#542)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) 🦕
1 parent 9d8cf67 commit 74c3915

File tree

8 files changed

+291
-12
lines changed

8 files changed

+291
-12
lines changed

.kokoro/release-nightly.sh

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ for gcs_path in gs://vertex_sdk_private_releases/bigframe/ \
106106
# write access to
107107
COVERAGE_TABLE=bigframes-metrics.coverage_report.bigframes_coverage_nightly
108108
python3.10 scripts/publish_api_coverage.py \
109+
bigquery \
109110
--bigframes_version=$BIGFRAMES_VERSION \
110111
--release_version=$RELEASE_VERSION \
111112
--bigquery_table=$COVERAGE_TABLE

docs/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ API reference
77
:maxdepth: 3
88

99
reference/index
10+
supported_pandas_apis
1011

1112
Changelog
1213
---------

docs/supported_pandas_apis.rst

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
Supported pandas APIs
2+
=====================
3+
4+
The following tables show the pandas APIs that have been implemented (or not)
5+
in BigQuery DataFrames.
6+
7+
* 'Y' means it implements all parameters.
8+
* 'P' means it implements only some parameters.
9+
10+
DataFrame
11+
---------
12+
13+
.. raw:: html
14+
:file: supported_pandas_apis/bf_dataframe.html
15+
16+
DataFrameGroupBy
17+
----------------
18+
19+
.. raw:: html
20+
:file: supported_pandas_apis/bf_dataframegroupby.html
21+
22+
Index
23+
-----
24+
25+
.. raw:: html
26+
:file: supported_pandas_apis/bf_index.html
27+
28+
pandas module
29+
-------------
30+
31+
.. raw:: html
32+
:file: supported_pandas_apis/bf_pandas.html
33+
34+
Series
35+
------
36+
37+
.. raw:: html
38+
:file: supported_pandas_apis/bf_series.html
39+
40+
Series.dt methods
41+
-----------------
42+
43+
.. raw:: html
44+
:file: supported_pandas_apis/bf_datetimemethods.html
45+
46+
Series.str methods
47+
------------------
48+
49+
.. raw:: html
50+
:file: supported_pandas_apis/bf_stringmethods.html
51+
52+
SeriesGroupBy
53+
-------------
54+
55+
.. raw:: html
56+
:file: supported_pandas_apis/bf_seriesgroupby.html
57+
58+
Window
59+
------
60+
61+
.. raw:: html
62+
:file: supported_pandas_apis/bf_window.html

docs/supported_pandas_apis/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.html

docs/templates/toc.yml

+2
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@
7272
name: Series
7373
- name: Window
7474
uid: bigframes.core.window.Window
75+
- href: supported_pandas_apis.html
76+
name: Supported pandas APIs
7577
name: bigframes.pandas
7678
- items:
7779
- items:

noxfile.py

+12
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,12 @@ def docs(session):
467467
)
468468

469469
shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
470+
471+
session.run(
472+
"python",
473+
"scripts/publish_api_coverage.py",
474+
"docs",
475+
)
470476
session.run(
471477
"sphinx-build",
472478
"-W", # warnings as errors
@@ -503,6 +509,12 @@ def docfx(session):
503509
)
504510

505511
shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
512+
513+
session.run(
514+
"python",
515+
"scripts/publish_api_coverage.py",
516+
"docs",
517+
)
506518
session.run(
507519
"sphinx-build",
508520
"-T", # show full traceback on exception

scripts/publish_api_coverage.py

+210-12
Original file line numberDiff line numberDiff line change
@@ -17,27 +17,110 @@
1717

1818
import argparse
1919
import inspect
20+
import pathlib
21+
import sys
2022

2123
import pandas as pd
24+
import pandas.core.groupby
25+
import pandas.core.indexes.accessors
26+
import pandas.core.strings.accessor
27+
import pandas.core.window.rolling
2228

29+
import bigframes
30+
import bigframes.core.groupby
31+
import bigframes.core.window
32+
import bigframes.operations.datetimes
2333
import bigframes.pandas as bpd
2434

35+
REPO_ROOT = pathlib.Path(__file__).parent.parent
36+
37+
URL_PREFIX = {
38+
"pandas": (
39+
"https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.pandas#bigframes_pandas_"
40+
),
41+
"dataframe": (
42+
"https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.dataframe.DataFrame#bigframes_dataframe_DataFrame_"
43+
),
44+
"dataframegroupby": (
45+
"https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.groupby.DataFrameGroupBy#bigframes_core_groupby_DataFrameGroupBy_"
46+
),
47+
"series": (
48+
"https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.series.Series#bigframes_series_Series_"
49+
),
50+
"seriesgroupby": (
51+
"https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.groupby.SeriesGroupBy#bigframes_core_groupby_SeriesGroupBy_"
52+
),
53+
"datetimemethods": (
54+
"https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.datetimes.DatetimeMethods#bigframes_operations_datetimes_DatetimeMethods_"
55+
),
56+
"stringmethods": (
57+
"https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.strings.StringMethods#bigframes_operations_strings_StringMethods_"
58+
),
59+
"window": (
60+
"https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.window.Window#bigframes_core_window_Window_"
61+
),
62+
# TODO: Index not documented.
63+
}
64+
65+
66+
PANDAS_TARGETS = [
67+
("pandas", pd, bpd),
68+
("dataframe", pd.DataFrame, bpd.DataFrame),
69+
(
70+
"dataframegroupby",
71+
pandas.core.groupby.DataFrameGroupBy,
72+
bigframes.core.groupby.DataFrameGroupBy,
73+
),
74+
("series", pd.Series, bpd.Series),
75+
(
76+
"seriesgroupby",
77+
pandas.core.groupby.DataFrameGroupBy,
78+
bigframes.core.groupby.DataFrameGroupBy,
79+
),
80+
(
81+
"datetimemethods",
82+
pandas.core.indexes.accessors.CombinedDatetimelikeProperties,
83+
bigframes.operations.datetimes.DatetimeMethods,
84+
),
85+
(
86+
"stringmethods",
87+
pandas.core.strings.accessor.StringMethods,
88+
bigframes.operations.strings.StringMethods,
89+
),
90+
(
91+
"window",
92+
pandas.core.window.rolling.Rolling,
93+
bigframes.core.window.Window,
94+
),
95+
("index", pd.Index, bpd.Index),
96+
]
97+
98+
99+
def names_from_signature(signature):
100+
"""Extract the names of parameters from signature
101+
102+
See: https://docs.python.org/3/library/inspect.html#inspect.signature
103+
"""
104+
return frozenset({parameter for parameter in signature.parameters})
105+
106+
107+
def calculate_missing_parameters(bigframes_function, target_function):
108+
bigframes_params = names_from_signature(inspect.signature(bigframes_function))
109+
target_params = names_from_signature(inspect.signature(target_function))
110+
return target_params - bigframes_params
111+
25112

26113
def generate_pandas_api_coverage():
27114
"""Inspect all our pandas objects, and compare with the real pandas objects, to see
28115
which methods we implement. For each, generate a regex that can be used to check if
29116
its present in a notebook"""
30-
header = ["api", "pattern", "kind", "is_in_bigframes"]
117+
header = ["api", "pattern", "kind", "is_in_bigframes", "missing_parameters"]
31118
api_patterns = []
32-
targets = [
33-
("pandas", pd, bpd),
34-
("dataframe", pd.DataFrame, bpd.DataFrame),
35-
("series", pd.Series, bpd.Series),
36-
("index", pd.Index, bpd.Index),
37-
]
38119
indexers = ["loc", "iloc", "iat", "ix", "at"]
39-
for name, pandas_obj, bigframes_obj in targets:
120+
for name, pandas_obj, bigframes_obj in PANDAS_TARGETS:
40121
for member in dir(pandas_obj):
122+
missing_parameters = ""
123+
41124
# skip private functions and properties
42125
if member[0] == "_" and member[1] != "_":
43126
continue
@@ -50,6 +133,17 @@ def generate_pandas_api_coverage():
50133
# Function, match .member(
51134
token = f"\\.{member}\\("
52135
token_type = "function"
136+
137+
if hasattr(bigframes_obj, member):
138+
bigframes_function = getattr(bigframes_obj, member)
139+
pandas_function = getattr(pandas_obj, member)
140+
missing_parameters = ", ".join(
141+
sorted(
142+
calculate_missing_parameters(
143+
bigframes_function, pandas_function
144+
)
145+
)
146+
)
53147
elif member in indexers:
54148
# Indexer, match .indexer[
55149
token = f"\\.{member}\\["
@@ -62,7 +156,13 @@ def generate_pandas_api_coverage():
62156
is_in_bigframes = hasattr(bigframes_obj, member)
63157

64158
api_patterns.append(
65-
[f"{name}.{member}", token, token_type, is_in_bigframes]
159+
[
160+
f"{name}.{member}",
161+
token,
162+
token_type,
163+
is_in_bigframes,
164+
missing_parameters,
165+
]
66166
)
67167

68168
return pd.DataFrame(api_patterns, columns=header)
@@ -165,14 +265,112 @@ def build_api_coverage_table(bigframes_version: str, release_version: str):
165265
return combined_df.infer_objects().convert_dtypes()
166266

167267

268+
def format_api(api_names, is_in_bigframes, api_prefix):
269+
api_names = api_names.str.slice(start=len(f"{api_prefix}."))
270+
formatted = "<code>" + api_names + "</code>"
271+
url_prefix = URL_PREFIX.get(api_prefix)
272+
if url_prefix is None:
273+
return formatted
274+
275+
linked = '<a href="' + url_prefix + api_names + '">' + formatted + "</a>"
276+
return formatted.mask(is_in_bigframes, linked)
277+
278+
279+
def generate_api_coverage(df, api_prefix):
280+
dataframe_apis = df.loc[df["api"].str.startswith(f"{api_prefix}.")]
281+
fully_implemented = (
282+
dataframe_apis["missing_parameters"].str.len() == 0
283+
) & dataframe_apis["is_in_bigframes"]
284+
partial_implemented = (
285+
dataframe_apis["missing_parameters"].str.len() != 0
286+
) & dataframe_apis["is_in_bigframes"]
287+
not_implemented = ~dataframe_apis["is_in_bigframes"]
288+
dataframe_table = pd.DataFrame(
289+
{
290+
"API": format_api(
291+
dataframe_apis["api"],
292+
dataframe_apis["is_in_bigframes"],
293+
api_prefix,
294+
),
295+
"Implemented": "",
296+
"Missing parameters": dataframe_apis["missing_parameters"],
297+
}
298+
)
299+
dataframe_table.loc[fully_implemented, "Implemented"] = "Y"
300+
dataframe_table.loc[partial_implemented, "Implemented"] = "P"
301+
dataframe_table.loc[not_implemented, "Implemented"] = "N"
302+
return dataframe_table
303+
304+
305+
def generate_api_coverage_doc(df, api_prefix):
306+
dataframe_table = generate_api_coverage(df, api_prefix)
307+
dataframe_table = dataframe_table.loc[~(dataframe_table["Implemented"] == "N")]
308+
dataframe_table["Implemented"] = dataframe_table["Implemented"].map(
309+
{
310+
"Y": "<b>Y</b>",
311+
"P": "<i>P</i>",
312+
}
313+
)
314+
315+
with open(
316+
REPO_ROOT / "docs" / "supported_pandas_apis" / f"bf_{api_prefix}.html",
317+
"w",
318+
) as html_file:
319+
dataframe_table.to_html(
320+
html_file, index=False, header=True, escape=False, border=0, col_space="8em"
321+
)
322+
323+
324+
def generate_api_coverage_docs(df):
325+
for target in PANDAS_TARGETS:
326+
api_prefix = target[0]
327+
generate_api_coverage_doc(df, api_prefix)
328+
329+
330+
def print_api_coverage_summary(df, api_prefix):
331+
dataframe_table = generate_api_coverage(df, api_prefix)
332+
333+
print(api_prefix)
334+
print(dataframe_table[["Implemented", "API"]].groupby(["Implemented"]).count())
335+
print(f"{api_prefix} APIs: {dataframe_table.shape[0]}\n")
336+
337+
338+
def print_api_coverage_summaries(df):
339+
for target in PANDAS_TARGETS:
340+
api_prefix = target[0]
341+
print_api_coverage_summary(df, api_prefix)
342+
343+
print(f"\nAll APIs: {len(df.index)}")
344+
fully_implemented = (df["missing_parameters"].str.len() == 0) & df[
345+
"is_in_bigframes"
346+
]
347+
print(f"Y: {fully_implemented.sum()}")
348+
partial_implemented = (df["missing_parameters"].str.len() != 0) & df[
349+
"is_in_bigframes"
350+
]
351+
print(f"P: {partial_implemented.sum()}")
352+
not_implemented = ~df["is_in_bigframes"]
353+
print(f"N: {not_implemented.sum()}")
354+
355+
168356
def main():
169357
parser = argparse.ArgumentParser()
170-
parser.add_argument("--bigframes_version")
171-
parser.add_argument("--release_version")
358+
parser.add_argument("output_type")
359+
parser.add_argument("--bigframes_version", default=bigframes.__version__)
360+
parser.add_argument("--release_version", default="")
172361
parser.add_argument("--bigquery_table_name")
173362
args = parser.parse_args()
174363
df = build_api_coverage_table(args.bigframes_version, args.release_version)
175-
df.to_gbq(args.bigquery_table_name, if_exists="append")
364+
365+
if args.output_type == "bigquery":
366+
df.to_gbq(args.bigquery_table_name, if_exists="append")
367+
elif args.output_type == "docs":
368+
generate_api_coverage_docs(df)
369+
elif args.output_type == "summary":
370+
print_api_coverage_summaries(df)
371+
else:
372+
print(f"Unexpected output_type {repr(args.output_type)}")
373+
sys.exit(1)
176374

177375

178376
if __name__ == "__main__":

scripts/test_publish_api_coverage.py

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def test_api_coverage_produces_expected_schema():
2727
"string",
2828
"boolean",
2929
"string",
30+
"string",
3031
"datetime64[ns]",
3132
"string",
3233
"string",
@@ -36,6 +37,7 @@ def test_api_coverage_produces_expected_schema():
3637
"pattern",
3738
"kind",
3839
"is_in_bigframes",
40+
"missing_parameters",
3941
"module",
4042
"timestamp",
4143
"bigframes_version",

0 commit comments

Comments
 (0)