Caffe2 - Caffemodel 转换为 Caffe2 pb 模型
1. 单输入单输出 - caffe_translator.py
Caffe2 提供了将 caffemodel 转换为 caffe2 模型的工具——caffe_translator.py.
其使用:
python -m caffe2.python.caffe_translator deploy.prototxt pretrained.caffemodel
即得到 caffe2 的模型:init_net.pb
,predict_net.pb
.
现在支持转换的网络层有:
- Input
- VideoData
- Data
- Convolution3D
- Convolution
- Deconvolution
- Crop
- ReLU
- Pooling
- Pooling3D
- LRN
- InnerProduct
- Dropout
- Softmax
- SoftmaxWithLoss
- Accuracy
- Concat
- TanH
- InstanceNorm
- BatchNorm
- Eltwise
- Scale
- Reshape
- Sigmoid
- ROIPooling
- PReLU
- Reduction
2. 多输入多输出
Caffe2 提供的模型转换工具貌似只支持单输入单输出,由于自己的网络模型有多个输入多个输出,故,基于caffe_translator.py 进行修改,得到新的 caffe_translator_multi.py:
# ----------------------------------
# caffe_translator_multi.py
# multi-inputs, multi-outputs
# ----------------------------------
#!/usr/bin/env python
import argparse
import copy
import logging
import re
import numpy as np # noqa
import sys
sys.path.insert(0, '/path/to/caffe2/build')
from caffe2.proto import caffe2_pb2, caffe2_legacy_pb2
from caffe.proto import caffe_pb2
from caffe2.python import core, utils, workspace
from google.protobuf import text_format
logging.basicConfig()
log = logging.getLogger("caffe_translator")
log.setLevel(logging.INFO)
def _StateMeetsRule(state, rule):
"""A function that reproduces Caffe's StateMeetsRule functionality."""
if rule.HasField('phase') and rule.phase != state.phase:
return False
if rule.HasField('min_level') and state.level < rule.min_level:
return False
if rule.HasField('max_level') and state.level > rule.max_level:
return False
curr_stages = set(list(state.stage))
# all stages in rule.stages should be in, otherwise it's not a match.
if len(rule.stage) and any([s not in curr_stages for s in rule.stage]):
return False
# none of the stage in rule.stages should be in, otherwise it's not a match.
if len(rule.not_stage) and any([s in curr_stages for s in rule.not_stage]):
return False
# If none of the nonmatch happens, return True.
return True
def _ShouldInclude(net_state, layer):
"""A function that reproduces Caffe's inclusion and exclusion rule."""
ret = (len(layer.include) == 0)
# check exclude rules: if any exclusion is met, we shouldn't include.
ret &= not any([_StateMeetsRule(net_state, rule) for rule in layer.exclude])
if len(layer.include):
# check include rules: if any inclusion is met, we should include.
ret |= any([_StateMeetsRule(net_state, rule) for rule in layer.include])
return ret
def _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops):
dim_map = {}
ws = workspace.C.Workspace()
for param in net_params.protos:
ws.create_blob(param.name) \
.feed(utils.Caffe2TensorToNumpyArray(param))
external_input = net.op[0].input[0]
ws.create_blob(external_input).feed(dummy_input)
# Get dimensions with legacy pad
for i in range(len(net.op)):
op_def = net.op[i]
ws._run_operator(op_def.SerializeToString())
if i in legacy_pad_ops:
output = op_def.output[0]
blob_legacy = ws.fetch_blob(output)
dim_map[i] = blob_legacy.shape
return dim_map
def _GetLegacyPadArgs(op_def, arg_map):
pads = {}
keys = ['pad_l', 'pad_t', 'pad_r', 'pad_b']
is_pad = 'pad' in arg_map
if is_pad:
for k in keys:
pads[k] = arg_map['pad'].i
else:
pads = {x: arg_map[x].i for x in keys}
return pads
def _AdjustDims(op_def, arg_map, pads, dim1, dim2):
n1, c1, h1, w1 = dim1
n2, c2, h2, w2 = dim2
assert(n1 == n2)
assert(c1 == c2)
is_pad = 'pad' in arg_map
if h1 != h2 or w1 != w2:
if h1 == h2 + 1:
pads['pad_b'] += 1
elif h1 != h2:
raise Exception("Unexpected dimensions for height:", h1, h2)
if w1 == w2 + 1:
pads['pad_r'] += 1
elif w1 != w2:
raise Exception("Unexpected dimensions for width:", w1, w2)
if is_pad:
op_def.arg.remove(arg_map['pad'])
args = []
for name in pads.keys():
arg = caffe2_pb2.Argument()
arg.name = name
arg.i = pads[name]
args.append(arg)
op_def.arg.extend(args)
else:
for name in pads.keys():
arg_map[name].i = pads[name]
def _RemoveLegacyPad(net, net_params, input_dims):
legacy_pad_ops = []
for i in range(len(net.op)):
op_def = net.op[i]
if re.match(r'^(Conv|ConvTranspose|MaxPool|AveragePool)(\dD)?$',
op_def.type):
for arg in op_def.arg:
if arg.name == 'legacy_pad':
legacy_pad_ops.append(i)
break
if legacy_pad_ops:
n, c, h, w = input_dims
dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
dim_map = _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops)
# Running with the legacy pad argument removed
# compare the dimensions and adjust pad argument when necessary
ws = workspace.C.Workspace()
external_input = net.op[0].input[0]
ws.create_blob(external_input).feed_blob(dummy_input)
for param in net_params.protos:
ws.create_blob(param.name) \
.feed_blob(utils.Caffe2TensorToNumpyArray(param))
for i in range(len(net.op)):
op_def = net.op[i]
if i in legacy_pad_ops:
arg_map = {}
for arg in op_def.arg:
arg_map[arg.name] = arg
pads = _GetLegacyPadArgs(op_def, arg_map)
# remove legacy pad arg
for j in range(len(op_def.arg)):
arg = op_def.arg[j]
if arg.name == 'legacy_pad':
del op_def.arg[j]
break
output = op_def.output[0]
# use a new name to avoid the interference with inplace
nonlegacy_output = output + '_nonlegacy'
op_def.output[0] = nonlegacy_output
ws._run_operator(op_def.SerializeToString())
blob_nonlegacy = ws.fetch_blob(nonlegacy_output)
# reset output name
op_def.output[0] = output
dim1 = dim_map[i]
dim2 = blob_nonlegacy.shape
_AdjustDims(op_def, arg_map, pads, dim1, dim2)
w