Hi:
I am trying to optimize a Tensorflow network with AutoTVM.
The tuning part finishes normally and generate corresponding .json
file, .so
file and .params
file, although five workloads did not find valid config.
But when the runtime module fails call run
method, it fails and produce such logs.
So, any suggestion to make it works?
Traceback (most recent call last):
File "run_model.py", line 112, in <module>
run()
File "run_model.py", line 36, in run
results = tf_tvm_py_runner.run(feed_dict)
File "/root/Codes/tf_tvm/tf_tvm/tf_tvm_runner.py", line 150, in run
sub_graph_runner.run(tensor_dict)
File "/root/Codes/tf_tvm/tf_tvm/tf_tvm_runner.py", line 99, in run
self._module.run()
File "/root/Codes/incubator-tvm/python/tvm/contrib/graph_runtime.py", line 176, in run
self._run()
File "/root/Codes/incubator-tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 213, in __call__
raise get_last_ffi_error()
tvm._ffi.base.TVMError: Traceback (most recent call last):
[bt] (3) /root/Codes/incubator-tvm/build/libtvm.so(TVMFuncCall+0x61) [0x7f1418811c91]
[bt] (2) /root/Codes/incubator-tvm/build/libtvm.so(std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::runtime::detail::PackFuncVoidAddr_<4, tvm::runtime::CUDAWrappedFunc>(tvm::runtime::CUDAWrappedFunc, std::vector<tvm::runtime::detail::ArgConvertCode, std::allocator<tvm::runtime::detail::ArgConvertCode> > const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#1}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)+0xbc) [0x7f14188a847c]
[bt] (1) /root/Codes/incubator-tvm/build/libtvm.so(tvm::runtime::CUDAWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*, void**) const+0x665) [0x7f14188a7f55]
[bt] (0) /root/Codes/incubator-tvm/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) [0x7f1417f61c72]
File "/root/Codes/incubator-tvm/src/runtime/cuda/cuda_module.cc", line 214
File "/root/Codes/incubator-tvm/src/runtime/library_module.cc", line 89
TVMError: Check failed: ret == 0 (-1 vs. 0) : CUDALaunch Error: CUDA_ERROR_INVALID_VALUE
grid=(4,215296,1), block=(64,1,1)
// func_name=fused_nn_dense_2_kernel0
// CUDA Source
// -----------
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-24817639
// Cuda compilation tools, release 10.0, V10.0.130
// Based on LLVM 3.4svn
//
.version 6.3
.target sm_75
.address_size 64
// .globl fused_transpose_layout_transform_kernel0
// _ZZ67fused_nn_contrib_conv2d_winograd_without_weight_transform_3_kernel1E18placeholder_shared has been demoted
// _ZZ67fused_nn_contrib_conv2d_winograd_without_weight_transform_3_kernel1E16data_pack_shared has been demoted
// _ZZ29fused_nn_conv2d_add_6_kernel0E15pad_temp_shared has been demoted
// _ZZ29fused_nn_conv2d_add_6_kernel0E18placeholder_shared has been demoted
// _ZZ29fused_nn_conv2d_add_9_kernel0E15pad_temp_shared has been demoted
// _ZZ29fused_nn_conv2d_add_9_kernel0E18placeholder_shared has been demoted
// _ZZ67fused_nn_contrib_conv2d_winograd_without_weight_transform_1_kernel1E18placeholder_shared has been demoted
// _ZZ67fused_nn_contrib_conv2d_winograd_without_weight_transform_1_kernel1E16data_pack_shared has been demoted
// _ZZ30fused_nn_conv2d_add_17_kernel0E15pad_temp_shared has been demoted
// _ZZ30fused_nn_conv2d_add_17_kernel0E18placeholder_shared has been demoted
// _ZZ71fused_nn_contrib_conv2d_winograd_without_weight_transform_add_3_kernel1E18placeholder_shared has been demoted
// _ZZ71fused_nn_contrib_conv2d_winograd_without_weight_transform_add_3_kernel1E16data_pack_shared has been demoted
// _ZZ71fused_nn_contrib_conv2d_winograd_without_weight_transform_add_2_kernel1E18placeholder_shared has been demoted
// _ZZ71fused_nn_contrib_conv2d_winograd_without_weight_transform_add_2_kernel1E16data_pack_shared has been demoted
// _ZZ30fused_nn_conv2d_add_14_kernel0E15pad_temp_shared has been demoted
// _ZZ30fused_nn_conv2d_add_14_kernel0E18placeholder_shared has been demoted
// _ZZ29fused_nn_conv2d_add_1_kernel0E15pad_temp_shared has been demoted
// _ZZ29fused_nn_conv2d_add_1_kernel0E18placeholder_shared has been demoted
// _ZZ25fused_nn_conv2d_4_kernel0E15pad_temp_shared has been demoted
// _ZZ25fused_nn_conv2d_4_kernel0E18placeholder_shared has been demoted
// _ZZ25fused_nn_conv2d_2_kernel0E15pad_temp_shared has been demoted
// _ZZ25fused_nn_conv2d_2_kernel0E18placeholder_shared has been demoted
// _ZZ29fused_nn_conv2d_add_5_kernel0E15pad_temp_shared has been demoted
// _ZZ29fused_nn_conv2d_add_5_kernel0E18placeholder_shared has been demoted
// _ZZ24fused_nn_dense_2_kernel0E8red_buf0 has been demoted
// _ZZ22fused_nn_dense_kernel0E8red_buf0 has been demoted
// _ZZ25fused_nn_conv2d_3_kernel0E15pad_temp_shared has been demoted
// _ZZ25fused_nn_conv2d_3_kernel0E18placeholder_shared has been demoted
// _ZZ30fused_nn_conv2d_add_15_kernel0E15pad_temp_shared has been demoted
// _ZZ30fused_nn_conv2d_add_15_kernel0E18placeholder_shared has been demoted
// _ZZ29fused_nn_conv2d_add_8_kernel0E15pad_temp_shared has been demoted
// _ZZ29fused_nn_conv2d_add_8_kernel0E18placeholder_shared has been demoted
// _ZZ30fused_nn_conv2d_add_11_kernel0E15pad_temp_shared has been demoted
// _ZZ30fused_nn_conv2d_add_11_kernel0E18placeholder_shared has been demoted
// _ZZ30fused_nn_conv2d_add_13_kernel0E15pad_temp_shared has been demoted
// _ZZ30fused_nn_conv2d_add_13_kernel0E18placeholder_shared has been demoted
// _ZZ24fused_nn_dense_1_kernel0E8red_buf0 has been demoted
// _ZZ29fused_nn_conv2d_add_2_kernel0E15pad_temp_shared has been demoted
// _ZZ29fused_nn_conv2d_add_2_kernel0E18placeholder_shared has been demoted
// _ZZ71fused_nn_contrib_conv2d_winograd_without_weight_transform_add_1_kernel1E18placeholder_shared has been demoted
// _ZZ71fused_nn_contrib_conv2d_winograd_without_weight_transform_add_1_kernel1E16data_pack_shared has been demoted
// _ZZ67fused_nn_contrib_conv2d_winograd_without_weight_transform_2_kernel1E18placeholder_shared has been demoted
// _ZZ67fused_nn_contrib_conv2d_winograd_without_weight_transform_2_kernel1E16data_pack_shared has been demoted
// _ZZ30fused_nn_conv2d_add_12_kernel0E15pad_temp_shared has been demoted
// _ZZ30fused_nn_conv2d_add_12_kernel0E18placeholder_shared has been demoted
// _ZZ29fused_nn_conv2d_add_7_kernel0E15pad_temp_shared has been demoted
// _ZZ29fused_nn_conv2d_add_7_kernel0E18placeholder_shared has been demoted
// _ZZ65fused_nn_contrib_conv2d_winograd_without_weight_transform_kernel1E18placeholder_shared has been demoted
// _ZZ65fused_nn_contrib_conv2d_winograd_without_weight_transform_kernel1E16data_pack_shared has been demoted
// _ZZ69fused_nn_contrib_conv2d_winograd_without_weight_transform_add_kernel1E18placeholder_shared has been demoted
// _ZZ69fused_nn_contrib_conv2d_winograd_without_weight_transform_add_kernel1E16data_pack_shared has been demoted
// _ZZ30fused_nn_conv2d_add_16_kernel0E15pad_temp_shared has been demoted
// _ZZ30fused_nn_conv2d_add_16_kernel0E18placeholder_shared has been demoted
// _ZZ30fused_nn_conv2d_add_10_kernel0E15pad_temp_shared has been demoted
// _ZZ30fused_nn_conv2d_add_10_kernel0E18placeholder_shared has been demoted
// _ZZ27fused_nn_conv2d_add_kernel0E15pad_temp_shared has been demoted
// _ZZ27fused_nn_conv2d_add_kernel0E18placeholder_shared has been demoted
// _ZZ29fused_nn_conv2d_add_3_kernel0E15pad_temp_shared has been demoted
// _ZZ29fused_nn_conv2d_add_3_kernel0E18placeholder_shared has been demoted
// _ZZ25fused_nn_conv2d_1_kernel0E15pad_temp_shared has been demoted
// _ZZ25fused_nn_conv2d_1_kernel0E18placeholder_shared has been demoted
// _ZZ29fused_nn_conv2d_add_4_kernel0E15pad_temp_shared has been demoted
// _ZZ29fused_nn_conv2d_add_4_kernel0E18placeholder_shared has been demoted
// _ZZ30fused_nn_conv2d_add_18_kernel0E15pad_temp_shared has been demoted
// _ZZ30fused_nn_conv2d_add_18_kernel0E18placeholder_shared has been demoted
.visible .entry fused_transpose_layout_transform_kernel0(
.param .u64 fused_transpose_layout_transform_kernel0_param_0,
.param .u64 fused_transpose_layout_transform_kernel0_param_1
)
{
.reg .pred %p<3>;
.reg .f32 %f<2>;
.reg .b32 %r<11>;
.reg .b64 %rd<8>;
ld.param.u64 %rd3, [fused_transpose_layout_transform_kernel0_param_0];
ld.param.u64 %rd4, [fused_transpose_layout_transform_kernel0_param_1];
mov.u32 %r7, %ctaid.x;
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd3;
mov.u32 %r8, %tid.x;
mad.lo.s32 %r10, %r7, 1024, %r8;
mov.u32 %r9, -53;
BB0_1:
setp.gt.s32 %p1, %r10, 13778943;
@%p1 bra BB0_3;
mul.wide.s32 %rd5, %r10, 4;
add.s64 %rd6, %rd1, %rd5;
ld.global.nc.f32 %f1, [%rd6];
add.s64 %rd7, %rd2, %rd5;
st.global.f32 [%rd7], %f1;
BB0_3:
add.s32 %r10, %r10, 262144;
add.s32 %r9, %r9, 1;
setp.ne.s32 %p2, %r9, 0;
@%p2 bra BB0_1;
ret;
}