forked from DonJayamanne/pythonVSCode
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy pathnormalizeSelection.py
310 lines (261 loc) · 12.4 KB
/
normalizeSelection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import ast
import json
import re
import sys
import textwrap
from typing import Iterable
attach_bracket_paste = sys.version_info >= (3, 13)
def split_lines(source):
"""
Split selection lines in a version-agnostic way.
Python grammar only treats \r, \n, and \r\n as newlines.
But splitlines() in Python 3 has a much larger list: for example, it also includes \v, \f.
As such, this function will split lines across all Python versions.
"""
return re.split(r"[\n\r]+", source)
def _get_statements(selection):
"""Process a multiline selection into a list of its top-level statements.
This will remove empty newlines around and within the selection, dedent it,
and split it using the result of `ast.parse()`.
"""
# Remove blank lines within the selection to prevent the REPL from thinking the block is finished.
lines = (line for line in split_lines(selection) if line.strip() != "")
# Dedent the selection and parse it using the ast module.
# Note that leading comments in the selection will be discarded during parsing.
source = textwrap.dedent("\n".join(lines))
tree = ast.parse(source)
# We'll need the dedented lines to rebuild the selection.
lines = split_lines(source)
# Get the line ranges for top-level blocks returned from parsing the dedented text
# and split the selection accordingly.
# tree.body is a list of AST objects, which we rely on to extract top-level statements.
# If we supported Python 3.8+ only we could use the lineno and end_lineno attributes of each object
# to get the boundaries of each block.
# However, earlier Python versions only have the lineno attribute, which is the range start position (1-indexed).
# Therefore, to retrieve the end line of each block in a version-agnostic way we need to do
# `end = next_block.lineno - 1`
# for all blocks except the last one, which will will just run until the last line.
ends = []
for node in tree.body[1:]:
line_end = node.lineno - 1
# Special handling of decorators:
# In Python 3.8 and higher, decorators are not taken into account in the value returned by lineno,
# and we have to use the length of the decorator_list array to compute the actual start line.
# Before that, lineno takes into account decorators, so this offset check is unnecessary.
# Also, not all AST objects can have decorators.
if hasattr(node, "decorator_list") and sys.version_info >= (3, 8):
# Using getattr instead of node.decorator_list or pyright will complain about an unknown member.
line_end -= len(getattr(node, "decorator_list")) # noqa: B009
ends.append(line_end)
ends.append(len(lines))
for node, end in zip(tree.body, ends):
# Given this selection:
# 1: if (m > 0 and
# 2: n < 3):
# 3: print('foo')
# 4: value = 'bar'
#
# The first block would have lineno = 1,and the second block lineno = 4
start = node.lineno - 1
# Special handling of decorators similar to what's above.
if hasattr(node, "decorator_list") and sys.version_info >= (3, 8):
# Using getattr instead of node.decorator_list or pyright will complain about an unknown member.
start -= len(getattr(node, "decorator_list")) # noqa: B009
block = "\n".join(lines[start:end])
# If the block is multiline, add an extra newline character at its end.
# This way, when joining blocks back together, there will be a blank line between each multiline statement
# and no blank lines between single-line statements, or it would look like this:
# >>> x = 22
# >>>
# >>> total = x + 30
# >>>
# Note that for the multiline parentheses case this newline is redundant,
# since the closing parenthesis terminates the statement already.
# This means that for this pattern we'll end up with:
# >>> x = [
# ... 1
# ... ]
# >>>
# >>> y = [
# ... 2
# ...]
if end - start > 1:
block += "\n"
yield block
def normalize_lines(selection):
"""
Normalize the text selection received from the extension.
If it is a single line selection, dedent it and append a newline and
send it back to the extension.
Otherwise, sanitize the multiline selection before returning it:
split it in a list of top-level statements
and add newlines between each of them so the REPL knows where each block ends.
"""
try:
# Parse the selection into a list of top-level blocks.
# We don't differentiate between single and multiline statements
# because it's not a perf bottleneck,
# and the overhead from splitting and rejoining strings in the multiline case is one-off.
statements = _get_statements(selection)
# Insert a newline between each top-level statement, and append a newline to the selection.
source = "\n".join(statements) + "\n"
# If selection ends with trailing dictionary or list, remove last unnecessary newline.
if selection[-2] == "}" or selection[-2] == "]":
source = source[:-1]
# If the selection contains trailing return dictionary, insert newline to trigger execute.
if check_end_with_return_dict(selection):
source = source + "\n"
except Exception:
# If there's a problem when parsing statements,
# append a blank line to end the block and send it as-is.
source = selection + "\n\n"
return source
top_level_nodes = []
min_key = None
def check_end_with_return_dict(code):
stripped_code = code.strip()
return stripped_code.endswith("}") and "return {" in stripped_code.strip()
def check_exact_exist(top_level_nodes, start_line, end_line):
return [
node
for node in top_level_nodes
if node.lineno == start_line and node.end_lineno == end_line
]
def traverse_file(whole_file_content, start_line, end_line, was_highlighted): # noqa: ARG001
"""Intended to traverse through a user's given file content and find, collect all appropriate lines that should be sent to the REPL in case of smart selection.
This could be exact statement such as just a single line print statement,
or a multiline dictionary, or differently styled multi-line list comprehension, etc.
Then call the normalize_lines function to normalize our smartly selected code block.
"""
parsed_file_content = None
try:
parsed_file_content = ast.parse(whole_file_content)
except Exception:
# Handle case where user is attempting to run code where file contains deprecated Python code.
# Let typescript side know and show warning message.
return {
"normalized_smart_result": "deprecated",
"which_line_next": 0,
}
smart_code = ""
should_run_top_blocks = []
# Purpose of this loop is to fetch and collect all the
# AST top level nodes, and its node.body as child nodes.
# Individual nodes will contain information like
# the start line, end line and get source segment information
# that will be used to smartly select, and send normalized code.
for node in ast.iter_child_nodes(parsed_file_content):
top_level_nodes.append(node)
ast_types_with_nodebody = (
ast.Module,
ast.Interactive,
ast.Expression,
ast.FunctionDef,
ast.AsyncFunctionDef,
ast.ClassDef,
ast.For,
ast.AsyncFor,
ast.While,
ast.If,
ast.With,
ast.AsyncWith,
ast.Try,
ast.Lambda,
ast.IfExp,
ast.ExceptHandler,
)
if isinstance(node, ast_types_with_nodebody) and isinstance(node.body, Iterable):
top_level_nodes.extend(node.body)
exact_nodes = check_exact_exist(top_level_nodes, start_line, end_line)
# Just return the exact top level line, if present.
if len(exact_nodes) > 0:
which_line_next = 0
for same_line_node in exact_nodes:
should_run_top_blocks.append(same_line_node)
smart_code += f"{ast.get_source_segment(whole_file_content, same_line_node)}\n"
which_line_next = get_next_block_lineno(should_run_top_blocks)
return {
"normalized_smart_result": smart_code,
"which_line_next": which_line_next,
}
# For each of the nodes in the parsed file content,
# add the appropriate source code line(s) to be sent to the REPL, dependent on
# user is trying to send and execute single line/statement or multiple with smart selection.
for top_node in ast.iter_child_nodes(parsed_file_content):
if start_line == top_node.lineno and end_line == top_node.end_lineno:
should_run_top_blocks.append(top_node)
smart_code += f"{ast.get_source_segment(whole_file_content, top_node)}\n"
break # If we found exact match, don't waste computation in parsing extra nodes.
elif start_line >= top_node.lineno and end_line <= top_node.end_lineno:
# Case to apply smart selection for multiple line.
# This is the case for when we have to add multiple lines that should be included in the smart send.
# For example:
# 'my_dictionary': {
# 'Audi': 'Germany',
# 'BMW': 'Germany',
# 'Genesis': 'Korea',
# }
# with the mouse cursor at 'BMW': 'Germany', should send all of the lines that pertains to my_dictionary.
should_run_top_blocks.append(top_node)
smart_code += str(ast.get_source_segment(whole_file_content, top_node))
smart_code += "\n"
normalized_smart_result = normalize_lines(smart_code)
which_line_next = get_next_block_lineno(should_run_top_blocks)
return {
"normalized_smart_result": normalized_smart_result,
"which_line_next": which_line_next,
}
# Look at the last top block added, find lineno for the next upcoming block,
# This will be used in calculating lineOffset to move cursor in VS Code.
def get_next_block_lineno(which_line_next):
last_ran_lineno = int(which_line_next[-1].end_lineno)
next_lineno = int(which_line_next[-1].end_lineno)
for reverse_node in top_level_nodes:
if reverse_node.lineno > last_ran_lineno:
next_lineno = reverse_node.lineno
break
return next_lineno
if __name__ == "__main__":
# Content is being sent from the extension as a JSON object.
# Decode the data from the raw bytes.
stdin = sys.stdin if sys.version_info < (3,) else sys.stdin.buffer
raw = stdin.read()
contents = json.loads(raw.decode("utf-8"))
# Empty highlight means user has not explicitly selected specific text.
empty_highlight = contents.get("emptyHighlight", False)
# We also get the activeEditor selection start line and end line from the typescript VS Code side.
# Remember to add 1 to each of the received since vscode starts line counting from 0 .
vscode_start_line = contents["startLine"] + 1
vscode_end_line = contents["endLine"] + 1
# Send the normalized code back to the extension in a JSON object.
data = None
which_line_next = 0
if empty_highlight and contents.get("smartSendSettingsEnabled"):
result = traverse_file(
contents["wholeFileContent"],
vscode_start_line,
vscode_end_line,
not empty_highlight,
)
normalized = result["normalized_smart_result"]
which_line_next = result["which_line_next"]
if normalized == "deprecated":
data = json.dumps(
{"normalized": normalized, "attach_bracket_paste": attach_bracket_paste}
)
else:
data = json.dumps(
{
"normalized": normalized,
"nextBlockLineno": result["which_line_next"],
"attach_bracket_paste": attach_bracket_paste,
}
)
else:
normalized = normalize_lines(contents["code"])
data = json.dumps({"normalized": normalized, "attach_bracket_paste": attach_bracket_paste})
stdout = sys.stdout if sys.version_info < (3,) else sys.stdout.buffer
stdout.write(data.encode("utf-8"))
stdout.close()