-
Notifications
You must be signed in to change notification settings - Fork 28.5k
/
Copy pathbuild-error-docs.py
152 lines (138 loc) · 5.28 KB
/
build-error-docs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""
Generate a unified page of documentation for all error conditions.
"""
import json
import os
import re
from itertools import chain
from pathlib import Path
from textwrap import dedent
# To avoid adding new direct dependencies, we import from within mkdocs.
# This is not ideal as unrelated updates to mkdocs may break this script.
from mkdocs.structure.pages import markdown
THIS_DIR = Path(__file__).parent
SPARK_PROJECT_ROOT = THIS_DIR.parents[1]
DOCS_ROOT = SPARK_PROJECT_ROOT / "docs"
ERROR_CONDITIONS_PATH = (
SPARK_PROJECT_ROOT / "common/utils/src/main/resources/error/error-conditions.json"
)
def assemble_message(message_parts):
message = " ".join(message_parts)
cleaned_message = re.sub(r"(<.*?>)", lambda x: f"`{x.group(1)}`", message)
return markdown.markdown(cleaned_message)
def load_error_conditions(path):
with open(path) as f:
raw_error_conditions = json.load(f)
error_conditions = dict()
for name, details in raw_error_conditions.items():
if name.startswith("_LEGACY_ERROR") or name.startswith("INTERNAL_ERROR"):
continue
if "subClass" in details:
for sub_name in details["subClass"]:
details["subClass"][sub_name]["message"] = (
assemble_message(details["subClass"][sub_name]["message"])
)
details["message"] = assemble_message(details["message"])
error_conditions[name] = details
return error_conditions
def anchor_name(condition_name: str, sub_condition_name: str = None):
"""
URLs can, in practice, be up to 2,000 characters long without causing any issues. So we preserve
the condition name mostly as-is for use in the anchor, even when that name is very long.
See: https://stackoverflow.com/a/417184
"""
parts = [
part for part in (condition_name, sub_condition_name)
if part
]
anchor = "-".join(parts).lower().replace("_", "-")
return anchor
def generate_doc_rows(condition_name, condition_details):
condition_row = [
"""
<tr id="{anchor}">
<td>{sql_state}</td>
<td>
<span class="error-condition-name">
<code>
<a href="#{anchor}">#</a>
</code>
{condition_name}
</span>
</td>
<td>{message}</td>
</tr>
"""
.format(
anchor=anchor_name(condition_name),
sql_state=condition_details["sqlState"],
# This inserts soft break opportunities so that if a long name needs to be wrapped
# it will wrap in a visually pleasing manner.
# See: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/wbr
condition_name=condition_name.replace("_", "<wbr />_"),
message=condition_details["message"],
)
]
sub_condition_rows = []
if "subClass" in condition_details:
for sub_condition_name in sorted(condition_details["subClass"]):
sub_condition_rows.append(
"""
<tr id="{anchor}">
<td></td>
<td class="error-sub-condition">
<span class="error-condition-name">
<code>
<a href="#{anchor}">#</a>
</code>
{sub_condition_name}
</span>
</td>
<td class="error-sub-condition">{message}</td>
</tr>
"""
.format(
anchor=anchor_name(condition_name, sub_condition_name),
# See comment above for explanation of `<wbr />`.
sub_condition_name=sub_condition_name.replace("_", "<wbr />_"),
message=condition_details["subClass"][sub_condition_name]["message"],
)
)
doc_rows = condition_row + sub_condition_rows
return [
dedent(row).strip()
for row in doc_rows
]
def generate_doc_table(error_conditions):
doc_rows = chain.from_iterable([
generate_doc_rows(condition_name, condition_details)
for condition_name, condition_details
in sorted(
error_conditions.items(),
key=lambda x: (x[1]["sqlState"], x[0]),
)
])
table_html = (
"""
<table id="error-conditions">
<tr>
<th>Error State / SQLSTATE</th>
<th>Error Condition & Sub-Condition</th>
<th>Message</th>
</tr>
{rows}
</table>
"""
)
# We dedent here rather than above so that the interpolated rows (which are not
# indented) don't prevent the dedent from working.
table_html = dedent(table_html).strip().format(rows="\n".join(list(doc_rows)))
return table_html
if __name__ == "__main__":
error_conditions = load_error_conditions(ERROR_CONDITIONS_PATH)
doc_table = generate_doc_table(error_conditions)
(DOCS_ROOT / "_generated").mkdir(exist_ok=True)
html_table_path = DOCS_ROOT / "_generated" / "error-conditions.html"
with open(html_table_path, "w") as f:
f.write(doc_table)
print("Generated:", os.path.relpath(html_table_path, start=SPARK_PROJECT_ROOT))