21
21
from onnxruntime import get_available_providers
22
22
from onnx_array_api .ext_test_case import example_path
23
23
from onnx_array_api .ort .ort_optimizers import ort_optimized_model
24
- from onnx_array_api .ort .ort_profile import ort_profile
24
+ from onnx_array_api .ort .ort_profile import ort_profile , merge_ort_profile
25
+ from onnx_array_api .plotting .stat_plot import plot_ort_profile
25
26
26
27
27
- filename = example_path ("data/small.onnx" )
28
+ suffix = ""
29
+ filename = example_path (f"data/small{ suffix } .onnx" )
28
30
optimized = filename + ".optimized.onnx"
31
+ print (f"model={ filename !r} " )
29
32
30
33
if not os .path .exists (optimized ):
31
34
ort_optimized_model (filename , output = optimized )
32
- print (optimized )
35
+ print (f" optimized= { optimized !r } " )
33
36
34
37
#############################
38
+ # .. _l-example-ort-profiling:
39
+ #
35
40
# Profiling
36
41
# +++++++++
37
42
43
48
disable_optimization = True ,
44
49
providers = ["CPUExecutionProvider" ],
45
50
)
46
- prof_base .to_excel ("prof_base.xlsx" , index = False )
51
+ prof_base .to_excel (f "prof_base{ suffix } .xlsx" , index = False )
47
52
prof_base
48
53
49
54
#######################################
50
55
# And the optimized model.
51
56
52
- prof_opt = ort_profile (
57
+ prof_opti = ort_profile (
53
58
optimized ,
54
59
feeds ,
55
60
repeat = 6 ,
56
61
disable_optimization = True ,
57
62
providers = ["CPUExecutionProvider" ],
58
63
)
59
- prof_opt
64
+ prof_opti .to_excel (f"prof_opti{ suffix } .xlsx" , index = False )
65
+ prof_opti
60
66
61
67
#######################################
62
68
# And the graph is:
63
69
64
-
65
- def plot_profile (df , ax0 , ax1 = None , title = None ):
66
- gr_dur = (
67
- df [["dur" , "args_op_name" ]].groupby ("args_op_name" ).sum ().sort_values ("dur" )
68
- )
69
- gr_dur .plot .barh (ax = ax0 )
70
- if title is not None :
71
- ax0 .set_title (title )
72
- if ax1 is not None :
73
- gr_n = (
74
- df [["dur" , "args_op_name" ]]
75
- .groupby ("args_op_name" )
76
- .count ()
77
- .sort_values ("dur" )
78
- )
79
- gr_n = gr_n .loc [gr_dur .index , :]
80
- gr_n .plot .barh (ax = ax1 )
81
- ax1 .set_title ("n occurences" )
82
-
83
-
84
70
unique_op = set (prof_base ["args_op_name" ])
85
71
fig , ax = plt .subplots (2 , 2 , figsize = (10 , len (unique_op )), sharex = "col" )
86
- plot_profile (prof_base , ax [0 , 0 ], ax [0 , 1 ], title = "baseline" )
87
- plot_profile ( prof_opt , ax [1 , 0 ], ax [1 , 1 ], title = "optimized" )
88
-
89
- fig .savefig ("plot_profiling.png" )
72
+ plot_ort_profile (prof_base , ax [0 , 0 ], ax [0 , 1 ], title = "baseline" )
73
+ plot_ort_profile ( prof_opti , ax [1 , 0 ], ax [1 , 1 ], title = "optimized" )
74
+ fig . tight_layout ()
75
+ fig .savefig (f "plot_profiling{ suffix } .png" )
90
76
91
77
##################################################
92
78
# Merging profiles
@@ -96,103 +82,14 @@ def plot_profile(df, ax0, ax1=None, title=None):
96
82
# process the same image and the input and output size are the
97
83
# same at every iteration.
98
84
99
-
100
- def preprocess (df ):
101
- groupkey = [
102
- "args_op_name" ,
103
- "args_output_type_shape" ,
104
- "args_input_type_shape" ,
105
- "args_provider" ,
106
- ]
107
-
108
- def _idx (row ):
109
- """
110
- There may be multiple node with the same
111
- input/output types and shapes.
112
- This function gives every instance a distinct id.
113
- First unique op with same I/O receives the index 0.
114
- The counter restart when the session goes to the
115
- next image.
116
- """
117
- if row ["cat" ] == "Session" :
118
- occurences [0 ] = {}
119
- return - 1
120
- assert "idx" not in groupkey
121
- vals = [row [k ] for k in groupkey ]
122
- key = tuple (map (str , vals ))
123
- if key not in occurences [0 ]:
124
- occurences [0 ][key ] = 0
125
- else :
126
- occurences [0 ][key ] += 1
127
- return occurences [0 ][key ]
128
-
129
- df = df .copy ()
130
- occurences = [{}]
131
- df ["idx" ] = df .apply (_idx , axis = 1 )
132
- df = df [(df ["cat" ] == "Node" ) & df ["name" ].str .contains ("kernel_time" )]
133
- groupkey .append ("idx" )
134
- for c in groupkey :
135
- if c != "idx" :
136
- df [c ] = df [c ].apply (str )
137
- gr = df [groupkey + ["dur" ]].groupby (groupkey )
138
- return gr .sum ()
139
-
140
-
141
- base = preprocess (prof_base )
142
- opti = preprocess (prof_opt )
143
- merge = base .merge (
144
- opti , how = "outer" , suffixes = ("base" , "opti" ), left_index = True , right_index = True
145
- )
146
- merge = merge .reset_index (drop = False )
147
- merge .to_excel ("plot_profiling_merged.xlsx" , index = False )
85
+ merge , gr = merge_ort_profile (prof_base , prof_opti )
86
+ merge .to_excel (f"plot_profiling_merged{ suffix } .xlsx" , index = False )
148
87
merge
149
88
150
-
151
89
#####################################################
152
- # Aggregation
153
-
154
-
155
- def classify (row ):
156
- if numpy .isnan (row ["duropti" ]):
157
- return "-"
158
- if numpy .isnan (row ["durbase" ]):
159
- return "+"
160
- return "="
90
+ # More detailed
161
91
162
-
163
- keys = {"float" : "f" }
164
-
165
-
166
- def process_shape (s ):
167
- value = eval (s )
168
- ns = []
169
- for v in value :
170
- if len (v ) != 1 :
171
- raise NotImplementedError (f"Unexpected value { v } in { s !r} ." )
172
- k , v = list (v .items ())[0 ]
173
- n = "-" .join ([keys [k ], "x" .join (map (str , v ))])
174
- ns .append (n )
175
- return "," .join (ns )
176
-
177
-
178
- def label (row ):
179
- name = row ["args_op_name" ]
180
- inshape = process_shape (row ["args_input_type_shape" ])
181
- outshape = process_shape (row ["args_output_type_shape" ])
182
- side = row ["side" ][0 ]
183
- prov = row ["args_provider" ][:3 ]
184
- idx = row ["idx" ]
185
- return f"[{ side } { prov } ]{ name } ({ inshape } )->{ outshape } [{ idx } ]"
186
-
187
-
188
- df = merge .copy ()
189
- df ["side" ] = df .apply (classify , axis = 1 )
190
- df ["label" ] = df .apply (label , axis = 1 )
191
- gr = (
192
- df [["label" , "durbase" , "duropti" , "idx" ]]
193
- .groupby ("label" )
194
- .agg ({"durbase" : numpy .sum , "duropti" : numpy .sum , "idx" : max })
195
- )
92
+ gr .to_excel (f"plot_profiling_merged_details{ suffix } .xlsx" , index = False )
196
93
gr
197
94
198
95
################################
@@ -210,11 +107,10 @@ def label(row):
210
107
gr [["durbase" , "duropti" ]].plot .barh (ax = ax [0 ])
211
108
ax [0 ].set_title ("Side by side duration" )
212
109
gr = gr .copy ()
213
- gr ["idx" ] += 1
214
- gr [["idx" ]].plot .barh (ax = ax [1 ])
110
+ gr [["countbase" , "countopti" ]].plot .barh (ax = ax [1 ])
215
111
ax [1 ].set_title ("Side by side count" )
216
112
fig .tight_layout ()
217
- fig .savefig ("plot_profiling_side_by_side.png" )
113
+ fig .savefig (f "plot_profiling_side_by_side{ suffix } .png" )
218
114
219
115
220
116
########################################
@@ -231,21 +127,44 @@ def label(row):
231
127
disable_optimization = True ,
232
128
providers = ["CUDAExecutionProvider" ],
233
129
)
130
+ prof_base .to_excel (f"prof_cuda_base{ suffix } .xlsx" , index = False )
131
+
234
132
prof_opti = ort_profile (
235
133
optimized ,
236
134
feeds ,
237
135
repeat = 6 ,
238
136
disable_optimization = True ,
239
- providers = ["CUDAExecutionProvider" ],
137
+ providers = ["CUDAExecutionProvider" , "CPUExecutionProvider" ],
240
138
)
139
+ prof_opti .to_excel (f"prof_cuda_opti{ suffix } .xlsx" , index = False )
241
140
242
141
unique_op = set (prof_base ["args_op_name" ])
243
142
fig , ax = plt .subplots (2 , 2 , figsize = (10 , len (unique_op )), sharex = "col" )
244
- plot_profile (prof_base , ax [0 , 0 ], ax [0 , 1 ], title = "baseline" )
245
- plot_profile (prof_opt , ax [1 , 0 ], ax [1 , 1 ], title = "optimized" )
246
- fig .savefig ("plot_profiling_cuda.png" )
143
+ plot_ort_profile (prof_base , ax [0 , 0 ], ax [0 , 1 ], title = "baseline" )
144
+ plot_ort_profile (prof_opti , ax [1 , 0 ], ax [1 , 1 ], title = "optimized" )
145
+ fig .tight_layout ()
146
+ fig .savefig (f"plot_profiling_cuda{ suffix } .png" )
147
+
148
+ merge , gr = merge_ort_profile (prof_base , prof_opti )
149
+ merge .to_excel (f"plot_profiling_merged{ suffix } .xlsx" , index = False )
150
+ gr .to_excel (f"plot_profiling_merged_details{ suffix } .xlsx" , index = False )
151
+
152
+ grmax = gr ["durbase" ] + gr ["duropti" ]
153
+ total = grmax .sum ()
154
+ grmax /= total
155
+ gr = gr [grmax >= 0.01 ]
156
+
157
+ fig , ax = plt .subplots (1 , 2 , figsize = (14 , min (gr .shape [0 ], 500 )), sharey = True )
158
+ gr [["durbase" , "duropti" ]].plot .barh (ax = ax [0 ])
159
+ ax [0 ].set_title ("Side by side duration" )
160
+ gr = gr .copy ()
161
+ gr [["countbase" , "countopti" ]].plot .barh (ax = ax [1 ])
162
+ ax [1 ].set_title ("Side by side count" )
163
+ fig .tight_layout ()
164
+ fig .savefig (f"plot_profiling_side_by_side_cuda{ suffix } .png" )
165
+
247
166
else :
248
- print (f"CUDA not available in { get_available_providers ()} " )
167
+ print (f"CUDA not available in { get_available_providers ()} . " )
249
168
fig , ax = None , None
250
169
251
170
ax
0 commit comments