I compiled resnet50_v2 using the latest TVM
from mxnet.gluon.model_zoo.vision import get_model
model_name = 'resnet50_v2'
block = get_model(model_name, pretrained=True)
target = tvm.target.cuda()
target_host = 'llvm -target=aarch64-linux-gnu'
from tvm.autotvm.measure.measure_methods import set_cuda_target_arch
set_cuda_target_arch('sm_62')
lib.export_library(path_lib, cc="aarch64-linux-gnu-g++")
But when I tried to run it on Jetson TX2 I got CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES
# relay compiled version:
tvm._ffi.base.TVMError: Traceback (most recent call last):
[bt] (3) /usr/local/lib/python3.6/dist-packages/tvm-0.6.dev0-py3.6-linux-aarch64.egg/tvm/libtvm_runtime.so(TVMFuncCall+0x70) [0x7fa0d926a8]
[bt] (2) /usr/local/lib/python3.6/dist-packages/tvm-0.6.dev0-py3.6-linux-aarch64.egg/tvm/libtvm_runtime.so(+0x859b8) [0x7fa0e039b8]
[bt] (1) /usr/local/lib/python3.6/dist-packages/tvm-0.6.dev0-py3.6-linux-aarch64.egg/tvm/libtvm_runtime.so(+0x854cc) [0x7fa0e034cc]
[bt] (0) /usr/local/lib/python3.6/dist-packages/tvm-0.6.dev0-py3.6-linux-aarch64.egg/tvm/libtvm_runtime.so(+0x10064) [0x7fa0d8e064]
File "/home/nvidia/tvm/src/runtime/cuda/cuda_module.cc", line 215
File "/home/nvidia/tvm/src/runtime/module_util.cc", line 73
TVMError: Check failed: ret == 0 (-1 vs. 0) : CUDALaunch Error: CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES
grid=(2,14,4), block=(28,1,16)
// func_name=fused_nn_conv2d_add_3_kernel0
// CUDA Source
// -----------
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-24817639
// Cuda compilation tools, release 10.0, V10.0.130
// Based on LLVM 3.4svn
//
.version 6.3
.target sm_62
.address_size 64
// .globl fused_nn_dense_add_kernel0
// _ZZ26fused_nn_dense_add_kernel0E8red_buf0 has been demoted
// _ZZ26fused_nn_dense_add_kernel0E7compute has been demoted
// _ZZ48fused_nn_conv2d_add_multiply_add_nn_relu_kernel0E15pad_temp_shared has been demoted
// _ZZ48fused_nn_conv2d_add_multiply_add_nn_relu_kernel0E18placeholder_shared has been demoted
// _ZZ37fused_nn_conv2d_add_nn_relu_5_kernel0E15pad_temp_shared has been demoted
nnvm compiled version:
tvm._ffi.base.TVMError: Traceback (most recent call last):
[bt] (3) /usr/local/lib/python3.6/dist-packages/tvm-0.6.dev0-py3.6-linux-aarch64.egg/tvm/libtvm_runtime.so(TVMFuncCall+0x70) [0x7f8b45c6a8]
[bt] (2) /usr/local/lib/python3.6/dist-packages/tvm-0.6.dev0-py3.6-linux-aarch64.egg/tvm/libtvm_runtime.so(+0x859b8) [0x7f8b4cd9b8]
[bt] (1) /usr/local/lib/python3.6/dist-packages/tvm-0.6.dev0-py3.6-linux-aarch64.egg/tvm/libtvm_runtime.so(+0x854cc) [0x7f8b4cd4cc]
[bt] (0) /usr/local/lib/python3.6/dist-packages/tvm-0.6.dev0-py3.6-linux-aarch64.egg/tvm/libtvm_runtime.so(+0x10064) [0x7f8b458064]
File "/home/nvidia/tvm/src/runtime/cuda/cuda_module.cc", line 215
File "/home/nvidia/tvm/src/runtime/module_util.cc", line 73
TVMError: Check failed: ret == 0 (-1 vs. 0) : CUDALaunch Error: CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES
grid=(2,14,4), block=(28,1,16)
// func_name=fuse_conv2d_kernel0
// CUDA Source
// -----------
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-24817639
// Cuda compilation tools, release 10.0, V10.0.130
// Based on LLVM 3.4svn
//
.version 6.3
.target sm_62
.address_size 64
// .globl fuse_broadcast_add_kernel0
// _ZZ38fuse_conv2d_broadcast_add_relu_kernel0E15pad_temp_shared has been demoted
// _ZZ38fuse_conv2d_broadcast_add_relu_kernel0E13input1_shared has been demoted
Iām using cuda 10.0