-
Notifications
You must be signed in to change notification settings - Fork 531
/
Copy pathexport_example.py
96 lines (79 loc) · 2.98 KB
/
export_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# pyre-ignore-all-errors
import argparse
import copy
import torch
from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer
from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset
from executorch.backends.qualcomm.utils.utils import (
generate_htp_compiler_spec,
generate_qnn_executorch_compiler_spec,
to_edge_transform_and_lower_to_qnn,
)
from executorch.devtools import generate_etrecord
from executorch.examples.models import MODEL_NAME_TO_MODEL
from executorch.examples.models.model_factory import EagerModelFactory
from executorch.exir.capture._config import ExecutorchBackendConfig
from executorch.extension.export_util.utils import save_pte_program
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"-m",
"--model_name",
required=True,
help=f"provide a model name. Valid ones: {list(MODEL_NAME_TO_MODEL.keys())}",
)
parser.add_argument(
"-g",
"--generate_etrecord",
action="store_true",
required=True,
help="Generate ETRecord metadata to link with runtime results (used for profiling)",
)
parser.add_argument(
"-f",
"--output_folder",
type=str,
default="",
help="The folder to store the exported program",
)
args = parser.parse_args()
if args.model_name not in MODEL_NAME_TO_MODEL:
raise RuntimeError(
f"Model {args.model_name} is not a valid name. "
f"Available models are {list(MODEL_NAME_TO_MODEL.keys())}."
)
model, example_inputs, _, _ = EagerModelFactory.create_model(
*MODEL_NAME_TO_MODEL[args.model_name]
)
# Get quantizer
quantizer = QnnQuantizer()
# Typical pytorch 2.0 quantization flow
m = torch.export.export(model.eval(), example_inputs, strict=True).module()
m = prepare_pt2e(m, quantizer)
# Calibration
m(*example_inputs)
# Get the quantized model
m = convert_pt2e(m)
# Capture program for edge IR and delegate to QNN backend
backend_options = generate_htp_compiler_spec(
use_fp16=False,
)
compile_spec = generate_qnn_executorch_compiler_spec(
soc_model=QcomChipset.SM8550,
backend_options=backend_options,
)
delegated_program = to_edge_transform_and_lower_to_qnn(
m, example_inputs, compile_spec
)
# this is needed for the ETRecord as lowering modifies the graph in-place
edge_copy = copy.deepcopy(delegated_program)
executorch_program = delegated_program.to_executorch(
config=ExecutorchBackendConfig(extract_delegate_segments=False)
)
if args.generate_etrecord:
etrecord_path = args.output_folder + "etrecord.bin"
generate_etrecord(etrecord_path, edge_copy, executorch_program)
save_pte_program(executorch_program, args.model_name, args.output_folder)
if __name__ == "__main__":
main() # pragma: no cover