When I Used VMExecutor to run a CNN model, it threw an error
RuntimeError: Check failed: VerifyMemory(func): Direct host side access to device memory is detected. Did you forget to bind?
PrimFunc([placeholder, transform_weight]) attrs={"global_symbol": "fused_nn_contrib_conv2d_winograd_weight_transform_6", "tir.noalias": (bool)1, "target": cuda} {
...
This is because contrib_conv2d_winograd_weight_transform
doesn’t have a CUDA schedule. It’s expected to work fine because this op is generated in AlterOpLayout which will be evaluated later on CPU during compilation.
To reproduce, apply the below patch to https://github.com/apache/incubator-tvm/blob/master/tests/python/relay/benchmarking/benchmark_vm.py
diff --git a/tests/python/relay/benchmarking/benchmark_vm.py b/tests/python/relay/benchmarking/benchmark_vm.py
index a6e05bee5..215a90974 100644
--- a/tests/python/relay/benchmarking/benchmark_vm.py
+++ b/tests/python/relay/benchmarking/benchmark_vm.py
@@ -60,10 +60,13 @@ def benchmark_execution(mod,
def get_vm_output(mod, data, params, target, ctx, dtype='float32',
number=2, repeat=20):
with tvm.transform.PassContext(opt_level=3):
- exe = vm.compile(mod, target, params=params)
- rly_vm = vm_rt.VirtualMachine(exe)
- rly_vm.init(ctx)
- result = rly_vm.run(data)
+ #exe = vm.compile(mod, target, params=params)
+ #rly_vm = vm_rt.VirtualMachine(exe)
+ #rly_vm.init(ctx)
+ #result = rly_vm.run(data)
+
+ interp = relay.create_executor('vm', mod=mod, target=target)
+ interp.evaluate()(**params)
if measure:
print("Evaluate vm inference cost of {} on {}".format(model,
@@ -79,8 +82,8 @@ def benchmark_execution(mod,
# random input
data = np.random.uniform(size=data_shape).astype(dtype)
- target = "llvm"
- ctx = tvm.cpu(0)
+ target = "cuda"
+ ctx = tvm.gpu(0)
tvm_out = get_graph_runtime_output(mod, tvm.nd.array(data.astype(dtype)),
params, target, ctx, dtype)
My tvm version: 7a9346a019d1e