-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_optimization.py
128 lines (97 loc) · 3.45 KB
/
plot_optimization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
.. _l-onnx-array-onnxruntime-optimization:
Optimization with onnxruntime
=============================
*onnxruntime* optimizes the onnx graph by default before running
the inference. It modifies, fuses or add new operators.
Some of them are standard onnx operators, some of them
are implemented in onnxruntime (see `Supported Operators
<https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md>`_).
This example looks into the differences of two models.
Optimize a model with onnxruntime
+++++++++++++++++++++++++++++++++
"""
import os
from pprint import pprint
import numpy
from pandas import DataFrame
import matplotlib.pyplot as plt
from onnx import load
from onnx_array_api.ext_test_case import example_path
from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
from onnx_array_api.validation.diff import text_diff, html_diff
from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions
from onnx_array_api.ext_test_case import measure_time
from onnx_array_api.ort.ort_optimizers import ort_optimized_model
filename = example_path("data/small.onnx")
optimized = filename + ".optimized.onnx"
if not os.path.exists(optimized):
ort_optimized_model(filename, output=optimized)
print(optimized)
#############################
# Output comparison
# +++++++++++++++++
so = SessionOptions()
so.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
img = numpy.random.random((1, 3, 112, 112)).astype(numpy.float32)
sess = InferenceSession(filename, so, providers=["CPUExecutionProvider"])
sess_opt = InferenceSession(optimized, so, providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
out = sess.run(None, {input_name: img})[0]
out_opt = sess_opt.run(None, {input_name: img})[0]
if out.shape != out_opt.shape:
print("ERROR shape are different {out.shape} != {out_opt.shape}")
diff = numpy.abs(out - out_opt).max()
print(f"Differences: {diff}")
####################################
# Difference
# ++++++++++
#
# Unoptimized model.
with open(filename, "rb") as f:
model = load(f)
print("first model to text...")
text1 = onnx_simple_text_plot(model, indent=False)
print(text1)
#####################################
# Optimized model.
with open(optimized, "rb") as f:
model = load(f)
print("second model to text...")
text2 = onnx_simple_text_plot(model, indent=False)
print(text2)
########################################
# Differences
print("differences...")
print(text_diff(text1, text2))
#####################################
# HTML version.
print("html differences...")
output = html_diff(text1, text2)
with open("diff_html.html", "w", encoding="utf-8") as f:
f.write(output)
print("done.")
#####################################
# Benchmark
# +++++++++
img = numpy.random.random((1, 3, 112, 112)).astype(numpy.float32)
t1 = measure_time(lambda: sess.run(None, {input_name: img}), repeat=25, number=25)
t1["name"] = "original"
print("Original model")
pprint(t1)
t2 = measure_time(lambda: sess_opt.run(None, {input_name: img}), repeat=25, number=25)
t2["name"] = "optimized"
print("Optimized")
pprint(t2)
############################
# Plots
# +++++
fig, ax = plt.subplots(1, 1, figsize=(12, 4))
df = DataFrame([t1, t2]).set_index("name")
df
#######################################
# And the graph is:
ax.bar(df.index, df["average"].values, yerr=df["deviation"].values, capsize=6)
ax.set_title("Measure performance of optimized model\nlower is better")
plt.grid()
fig.savefig("plot_optimization.png")