TVM编译模型的relay ir有两种触发方式,一种是调用build接口,例如:
with tvm.transform.PassContext(opt_level=10):
lib = relay.build(func, "llvm", params=params)
这个build接口的流程在【从零开始学深度学习编译器】六,TVM的编译流程详解 - 知乎中有详细的分析。
一种是调用evaluate接口,这种是编译和推理一起,例如前面我们的测试例子:
target = "llvm"
input_name = "Input3"
shape_dict = {input_name: x.shape}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
with tvm.transform.PassContext(opt_level=1):
intrp = relay.build_module.create_executor("graph", mod, tvm.cpu(0), target)
dtype = "float32"
tvm_output = intrp.evaluate()(tvm.nd.array(x.astype(dtype)), **params).asnumpy()
tvm.transform.PassContext(opt_level=1)是设置当前tvm优化等级为1,tvm模型优化暂时不讨论。relay.build_module.create_executor是调用python/tvm/relay/build_module.py的create_executor函数:
def create_executor(kind="debug", mod=None, device=None, target="llvm", params=None):
"""Factory function to create an executor.
Parameters
----------
kind : str
The type of executor. Avaliable options are `debug` for the
interpreter, `graph` for the graph executor, and `vm` for the virtual
machine.
mod : :py:class:`~tvm.IRModule`
The Relay module containing collection of functions
device : :py:class:`Device`
The device to execute the code.
target : :py:class:`tvm.Target`
The corresponding context
params : dict of str to NDArray
Input parameters to the graph that do not change
during inference time.
Returns
-------
executor : :py:class:`~tvm.relay.backend.interpreter.Executor`
"""
if mod is None:
mod = IRModule()
if device is not None:
assert device.device_type == _nd.device(str(target), 0).device_type
else:
device = _nd.device(str(target), 0)
if params is not None:
mod = IRModule.from_expr(bind_params_by_name(mod["main"], params))
if isinstance(target, str):
target = Target(target)
if kind == "debug":
return _interpreter.Interpreter(mod, device, target)
if kind == "graph":
return GraphExecutor(mod, device, target)
if kind == "vm":
return VMExecutor(mod, device, target)
raise RuntimeError("unknown execution strategy: {0}".format(kind))
create_executor的第二个参数是Device类型,我们传入的是tvm.cpu(0)。看下python/tvm下的_init_.py文件,有:
from .runtime.ndarray import device, cpu, cuda, gpu, opencl, cl, vulkan, metal, mtl
所以这个tvm.cpu(0)的接口应该是在python/tvm/runtime/nddarray.py中定义的。这个文件中不仅仅有cpu,而且还有其他的设备类型,如cuda, opencl, gpu等等。所以如果我们要新增一种设备,应该需要在这个文件中新增新设备对应的接口。
而在create_executor中又根据target参数(我们传入的是llvm)设置了设备的device_type属性:
def create_executor(kind="debug", mod=None, device=None, target="llvm", params=None):
...
if device is not None:
assert device.device_type == _nd.device(str(target), 0).device_type
def device(dev_type, dev_id=0):
if isinstance(dev_type, string_types):
dev_type = dev_type.split()[0]
if dev_type not in Device.STR2MASK:
raise ValueError("Unknown device type %s" % dev_type)
dev_type = Device.STR2MASK[dev_type]
return Device(dev_type, dev_id)
这里Device定义在python/tvm/_ffi/runtime_ctypes.py,这个文件中也都是定义和设备相关的接口。dev_type参数传入的是llvm,所以这里Device实例的dev_type会是整型值1。
之后create_executor返回一个GraphExecutor实例。
回到测试程序中,在create_executor之后,调用intrp.evaluate(),也就是执行GraphExecutor的evaluate()方法。GraphExecutor没有定义evaluate(),所以这里将会调用到GraphExecutor父类_interpreter.Executor的evaluate()。在调用时没有传入任何参数,所以在直接走了
if not expr:
return self._make_executor()
这里self._make_executor是 GraphExecutor._make_executor
class GraphExecutor(_interpreter.Executor):
"""Wrapper around Executor interface.
This executor is used for debug and testing purposes.
Parameters
----------
mod : :py:class:`~tvm.IRModule`
The module to support the execution.
device : :py:class:`Device`
The runtime device to run the code on.
target : :py:class:`Target`
The target option to build the function.
"""
def __init__(self, mod, device, target):
assert mod is not None
self.mod = mod
self.device = device
self.target = target
def _make_executor(self, expr=None):
#上层调用的时候没有参数,所以expr为None
if expr:
# 设置main函数?
self.mod["main"] = expr
# 这个会调用到src/relay/transforms/type_infer.cc里面的Pass InferType() ,
# 像是做某种优化,没看懂这个
self.mod = InferType()(self.mod)
ret_type = self.mod["main"].checked_type.ret_type
if _ty.is_dynamic(ret_type):
raise ValueError(
"Graph Executor only supports static graphs, got output type", ret_type
)
# 编译tvm relay ir模型
mod = build(self.mod, target=self.target)
# 返回tvm模型的底层表示封装.该封装可以直接运行得到结果
gmodule = _graph_rt.GraphModule(mod["default"](self.device))
def _unflatten(flat_iter, cur_type):
if isinstance(cur_type, _ty.TensorType):
return next(flat_iter)
if isinstance(cur_type, _ty.TupleType):
fields = []
for field_type in cur_type.fields:
field = _unflatten(flat_iter, field_type)
fields.append(field)
return fields
raise ValueError("Return type", ret_type, "contains unsupported type", cur_type)
# 接收用户传入的模型输入,推理模型,输出结果
def _graph_wrapper(*args, **kwargs):
# 将输入(比如输入的是dict形式)转换为tvm模型要求的tulp表
args = self._convert_args(self.mod["main"], args, kwargs)
# Create map of inputs.
# 设置tvm模型底层封装的input
for i, arg in enumerate(args):
gmodule.set_input(i, arg)
# Run the module, and fetch the output.
# 执行模型推理
gmodule.run()
flattened = []
# 获取推理结果,组织成数据模式
for i in range(gmodule.get_num_outputs()):
flattened.append(gmodule.get_output(i).copyto(_nd.cpu(0)))
unflattened = _unflatten(iter(flattened), ret_type)
# 返回推理结果
return unflattened
# 注意这个地方返回的是一个函数,而不是推理结果
return _graph_wrapper
mod = build(self.mod, target=self.target)这个是调用了python/tvm/relay/build_module.py里面的build函数:
def build(
ir_mod,
target=None,
target_host=None,
executor=Executor("graph"),
runtime=Runtime("cpp"),
params=None,
mod_name="default",
):
# fmt: off
# pylint: disable=line-too-long
"""Helper function that builds a Relay function to run on TVM graph executor.
Parameters
----------
ir_mod : :py:class:`~tvm.IRModule`
The IR module to build. Using relay.Function is deprecated.
要编译的IR module,这个参数推荐使用IRModule类型的数据, 不要用relay.Function类型的数据
target : str, :any:`tvm.target.Target`, or dict of str(i.e. device/context name) to str/tvm.target.Target, optional
For heterogeneous compilation, it is a dictionary indicating context to
target mapping. For homogeneous compilation, it is a build target.
表示tvm.target.Target类型的字符串(如'tvm.target.arm_cpu', 'tvm.target.cuda'这种),或者字典形式(这个地方咋翻译?)
对于异构编译,这个字典表示环境到target的映射. 对于同构编译, 它是一个构建target(是这样翻译么?)
target_host : str or :any:`tvm.target.Target`, optional
Host compilation target, if target is device.
When TVM compiles device specific program such as CUDA,
we also need host(CPU) side code to interact with the driver
setup the dimensions and parameters correctly.
target_host is used to specify the host side codegen target.
By default, llvm is used if it is enabled,
otherwise a stackvm interpreter is used.
string类型的target表示.如果target是设备,host编译target(异构编译,在主机上编译程序的意思?).
当TVM编译device指定为cuda的侧程序时,
我们也需要host侧(也就是CPU)的代码来和驱动交互,正确设置维度和参数
target_host用来指定host侧代码生成目标.如果当前llvm使能了,默认情况下会使用llvm,
否则使用stackvm解释器
(感觉target就是指定device侧是啥,target_host就是指定当前编译程序的主机侧是啥,是这样么?)
executor : Optional[Executor]
The executor configuration with which to build the model.
Defaults to "graph" if no executor specified.
编译模型的执行器配置,默认指定为graph
runtime : Optional[Runtime]
Runtime configuration to use when building the model.
Defaults to "cpp" if no runtime specified.
params : dict of str to NDArray
Input parameters to the graph that do not change
during inference time. Used for constant folding.
推理期间保持不变的graph输入参数,用于做常量折叠
(也就是模型中的常量?)
mod_name: Optional[str]
The module name we will build
Returns
-------
factory_module : tvm.relay.backend.executor_factory.ExecutorFactoryModule
The runtime factory for the TVM graph executor.
"""
# pylint: enable=line-too-long
# fmt: on
# 我们的测试case输入的ir_mod是IRModule类型
if not isinstance(ir_mod, (IRModule, _function.Function)):
raise ValueError("Type of input parameter mod must be tvm.IRModule")
if isinstance(ir_mod, _function.Function):
if params:
ir_mod = bind_params_by_name(ir_mod, params)
ir_mod = IRModule.from_expr(ir_mod)
warnings.warn(
"Please use input parameter mod (tvm.IRModule) "
"instead of deprecated parameter mod (tvm.relay.function.Function)",
DeprecationWarning,
)
if target_host is not None:
warnings.warn(
"target_host parameter is going to be deprecated. "
"Please pass in tvm.target.Target(target, host=target_host) instead."
)
# 检查target和target host的一致性
target, target_host = Target.check_and_update_host_consist(
target, target_host, target_is_dict_key=False
)
# 生成target实例
target = build_target_by_device_type_map(target)
if isinstance(target_host, (str, Target)):
target_host = Target(target_host)
elif target_host:
raise ValueError("target host must be the type of str, " + "tvm.target.Target, or None")
# All of this logic is to raise deprecation warnings for various parameters
# TODO(Mousius) Remove these after some time
# 没看明白是在什么
deprecated_params_target = target_host or list(target.values())[0]
deprecated_executor, deprecated_runtime = _reconstruct_from_deprecated_options(
deprecated_params_target
)
if deprecated_executor:
executor = deprecated_executor
if deprecated_runtime:
runtime = deprecated_runtime
# If current dispatch context is fallback context (the default root context),
# then load pre-tuned parameters from TopHub
if isinstance(autotvm.DispatchContext.current, autotvm.FallbackContext):
tophub_context = autotvm.tophub.context(list(target.values()))
else:
tophub_context = autotvm.utils.EmptyContext()
with tophub_context:
# 实例化一个BuildModule,然后调用该实例的build方法编译模型
bld_mod = BuildModule()
graph_json, runtime_mod, params = bld_mod.build(
mod=ir_mod,
target=target,
params=params,
executor=executor,
runtime=runtime,
mod_name=mod_name,
)
func_metadata = bld_mod.get_function_metadata()
devices = bld_mod.get_devices()
lowered_ir_mods = bld_mod.get_irmodule()
if str(executor) == "aot":
executor_factory = _executor_factory.AOTExecutorFactoryModule(
ir_mod,
lowered_ir_mods,
target,
executor,
runtime_mod,
mod_name,
params,
func_metadata,
devices,
)
elif str(executor) == "graph":
#编译得到的模型和参数一起打包
executor_factory = _executor_factory.GraphExecutorFactoryModule(
ir_mod, target, executor, graph_json, runtime_mod, mod_name, params, func_metadata
)
else:
assert False, "Executor " + executor + " not supported"
return executor_factory
这里调用了BuildModule.build对模型的tvm relay ir进行编译。先看下这个类的__init__函数
class BuildModule(object):
"""Build an IR module to run on TVM graph executor. This class is used
to expose the `RelayBuildModule` APIs implemented in C++.
"""
def __init__(self):
# 即relay.build_module._BuildModule
self.mod = _build_module._BuildModule()
self._get_graph_json = self.mod["get_graph_json"]
self._get_module = self.mod["get_module"]
self._build = self.mod["build"]
self._optimize = self.mod["optimize"]
self._set_params_func = self.mod["set_params"]
self._get_params_func = self.mod["get_params"]
self._get_function_metadata = self.mod["get_function_metadata"]
self._get_devices = self.mod["get_devices"]
self._get_irmodule = self.mod["get_irmodule"]
_build_modele._BuildModule()调用的是relay.build_module._BuildModule()函数,这个是C++注册的:
runtime::Module RelayBuildCreate() {
auto exec = make_object();
return runtime::Module(exec);
}
TVM_REGISTER_GLOBAL("relay.build_module._BuildModule").set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = RelayBuildCreate();
});
而接下来的self.mod["get_graph_json"]、mod["get_module"]等也都是C++中函数的句柄。中括号中的字符串是获取函数句柄的关键字,关键字和C++函数对应关系:
class RelayBuildModule : public runtime::ModuleNode {
public:
RelayBuildModule() = default;
PackedFunc GetFunction(const std::string& name, const ObjectPtr
这种字符串对应C++函数的机制就是TVM的PackedFunc机制。详细可以参考
TVM PackedFunc实现机制 | Don't Respond
前面说到python中调用BuildModule.build来编译模型,即调用的是C++中RelayBuildModule::build方法。
参考:
【从零开始学深度学习编译器】六,TVM的编译流程详解 - 知乎



