When trying to compile a resnet-like model for MALI GPU I encounter the following error:
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 3, 112, 112, 'float32'), (64, 3, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 64, 112, 112, 'float32'), (64, 64, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 64, 112, 112, 'float32'), (64, 64, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 64, 112, 112, 'float32'), (64, 64, 1, 1, 'float32'), (2, 2), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 64, 56, 56, 'float32'), (128, 64, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 128, 56, 56, 'float32'), (128, 128, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 128, 28, 28, 'float32'), (256, 128, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 256, 28, 28, 'float32'), (256, 256, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 256, 14, 14, 'float32'), (512, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('conv2d', (1, 512, 14, 14, 'float32'), (512, 512, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=opencl -device=mali, workload=('dense', (1, 25088, 'float32'), (512, 25088, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.
[03:05:50] /tvm/src/pass/loop_partition.cc:541: Cannot prove: ((((((((((((blockIdx.x*8) + threadIdx.x) % 16)/4)*4) + (threadIdx.x % 4)) + 1) - (((blockIdx.x*8) + threadIdx.x) % 16)) - 1) - 1) + 1) >= 0), when generating the post doubt loop
[03:05:50] /tvm/src/relay/ir/doc.h:51: text node: '{
"root": 1,
"nodes": [
{
"type_key": ""
},
{
"type_key": "StrMap",
"keys": [
"relay.attrs.Conv2DWinogradAttrs"
],
"data": [2]
},
{
"type_key": "Array",
"data": [3]
},
{
"type_key": "relay.attrs.Conv2DWinogradAttrs",
"attrs": {
"channels": "13",
"data_layout": "NCHW",
"dilation": "10",
"groups": "1",
"kernel_layout": "OIHW",
"kernel_size": "14",
"out_dtype": "",
"out_layout": "",
"padding": "7",
"strides": "4",
"tile_size": "2"
}
},
{
"type_key": "Array",
"data": [5, 6]
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "Array",
"data": [8, 9]
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "Array",
"data": [11, 12]
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "512"
}
},
{
"type_key": "Array",
"data": [15, 16]
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "3"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "3"
}
}
],
"b64ndarrays": [],
"attrs": {"tvm_version": "0.6.dev"}
}' should not has tab or newline.
Traceback (most recent call last):
File "../scripts/tvm_compile_android.py", line 63, in <module>
graph, lib, params = relay.build(mod, target, target_host, params=params)
File "/tvm/python/tvm/relay/build_module.py", line 207, in build
graph_json, mod, params = bld_mod.build(func, target, target_host, params)
File "/tvm/python/tvm/relay/build_module.py", line 108, in build
self._build(func, target, target_host)
File "tvm/_ffi/_cython/./function.pxi", line 310, in tvm._ffi._cy3.core.FunctionBase.__call__
File "tvm/_ffi/_cython/./function.pxi", line 245, in tvm._ffi._cy3.core.FuncCall
File "tvm/_ffi/_cython/./function.pxi", line 234, in tvm._ffi._cy3.core.FuncCall3
File "tvm/_ffi/_cython/./base.pxi", line 171, in tvm._ffi._cy3.core.CALL
tvm._ffi.base.TVMError: Traceback (most recent call last):
[bt] (8) /tvm/build/libtvm.so(tvm::relay::backend::GraphRuntimeCodegen::VisitExpr(tvm::relay::Expr const&)+0x566) [0x7fb56a48e996]
[bt] (7) /tvm/build/libtvm.so(tvm::relay::backend::GraphRuntimeCodegen::VisitExpr_(tvm::relay::CallNode const*)+0xb18) [0x7fb56a495d88]
[bt] (6) /tvm/build/libtvm.so(tvm::relay::backend::GraphRuntimeCodegen::VisitExpr(tvm::relay::Expr const&)+0x566) [0x7fb56a48e996]
[bt] (5) /tvm/build/libtvm.so(tvm::relay::backend::GraphRuntimeCodegen::VisitExpr_(tvm::relay::CallNode const*)+0xb18) [0x7fb56a495d88]
[bt] (4) /tvm/build/libtvm.so(tvm::relay::backend::GraphRuntimeCodegen::VisitExpr(tvm::relay::Expr const&)+0x566) [0x7fb56a48e996]
[bt] (3) /tvm/build/libtvm.so(tvm::relay::backend::GraphRuntimeCodegen::VisitExpr_(tvm::relay::CallNode const*)+0x6a9) [0x7fb56a495919]
[bt] (2) /tvm/build/libtvm.so(+0xa677bc) [0x7fb56a4a87bc]
[bt] (1) /tvm/build/libtvm.so(tvm::relay::CompileEngineImpl::LowerInternal(tvm::relay::CCacheKey const&)+0x872) [0x7fb56a4b3772]
[bt] (0) /tvm/build/libtvm.so(+0xc0b88b) [0x7fb56a64c88b]
File "/tvm/python/tvm/relay/backend/_backend.py", line 51, in lower
f = _build.lower(sch, inputs, name=func_name)
File "/tvm/python/tvm/build_module.py", line 416, in lower
return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func)
File "tvm/_ffi/_cython/./function.pxi", line 310, in tvm._ffi._cy3.core.FunctionBase.__call__
File "tvm/_ffi/_cython/./function.pxi", line 255, in tvm._ffi._cy3.core.FuncCall
File "tvm/_ffi/_cython/./base.pxi", line 171, in tvm._ffi._cy3.core.CALL
[bt] (3) /tvm/build/libtvm.so(TVMFuncCall+0x61) [0x7fb56a651391]
[bt] (2) /tvm/build/libtvm.so(std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), void tvm::runtime::TypedPackedFunc<tvm::LoweredFunc (tvm::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool)>::AssignTypedLambda<tvm::LoweredFunc (*)(tvm::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool)>(tvm::LoweredFunc (*)(tvm::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool))::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)+0x120) [0x7fb569ec1f50]
[bt] (1) /tvm/build/libtvm.so(tvm::ir::MakeAPI(tvm::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool)+0x3b97) [0x7fb56a0f2dd7]
[bt] (0) /tvm/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) [0x7fb569e7c542]
File "/tvm/src/pass/make_api.cc", line 187
File "tvm/_ffi/_cython/./function.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
File "/tvm/python/tvm/relay/backend/_backend.py", line 59, in lower
raise RuntimeError(msg)
File "/tvm/python/tvm/relay/backend/_backend.py", line 51, in lower
f = _build.lower(sch, inputs, name=func_name)
File "/tvm/python/tvm/build_module.py", line 416, in lower
return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func)
File "tvm/_ffi/_cython/./function.pxi", line 310, in tvm._ffi._cy3.core.FunctionBase.__call__
File "tvm/_ffi/_cython/./function.pxi", line 255, in tvm._ffi._cy3.core.FuncCall
File "tvm/_ffi/_cython/./base.pxi", line 171, in tvm._ffi._cy3.core.CALL
[bt] (3) /tvm/build/libtvm.so(TVMFuncCall+0x61) [0x7fb56a651391]
[bt] (2) /tvm/build/libtvm.so(std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), void tvm::runtime::TypedPackedFunc<tvm::LoweredFunc (tvm::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool)>::AssignTypedLambda<tvm::LoweredFunc (*)(tvm::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool)>(tvm::LoweredFunc (*)(tvm::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool))::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)+0x120) [0x7fb569ec1f50]
[bt] (1) /tvm/build/libtvm.so(tvm::ir::MakeAPI(tvm::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool)+0x3b97) [0x7fb56a0f2dd7]
[bt] (0) /tvm/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) [0x7fb569e7c542]
File "/tvm/src/pass/make_api.cc", line 187
TVMError: Not all Vars are passed in api_args: 'threadIdx.x' does not appear in api_args
During handling of the above exception, another exception occurred:
TVMError: Not all Vars are passed in api_args: 'threadIdx.x' does not appear in api_args
Error during compile function
-----------------------------
v0.0.4
fn (%p0: Tensor[(1, 512, 7, 7), float32], %p1: Tensor[(4, 4, 128, 512, 4), float32], %p2: Tensor[(512, 1, 1), float32], %p3: Tensor[(1, 512, 7, 7), float32], %p4: Tensor[(512, 1, 1), float32], %p5: Tensor[(512, 1, 1), float32], Primitive=1) -> Tensor[(1, 512, 7, 7), float32] {
%0 = nn.contrib_conv2d_winograd_without_weight_transform(%p0, %p1, meta[relay.attrs.Conv2DWinogradAttrs][0]) /* ty=Tensor[(1, 512, 7, 7), float32] */;
%1 = add(%0, %p2) /* ty=Tensor[(1, 512, 7, 7), float32] */;
%2 = add(%1, %p3) /* ty=Tensor[(1, 512, 7, 7), float32] */;
%3 = multiply(%2, %p4) /* ty=Tensor[(1, 512, 7, 7), float32] */;
add(%3, %p5) /* ty=Tensor[(1, 512, 7, 7), float32] */
}
METADATA:
{
"root": 1,
"nodes": [
{
"type_key": ""
},
{
"type_key": "StrMap",
"keys": [
"relay.attrs.Conv2DWinogradAttrs"
],
"data": [2]
},
{
"type_key": "Array",
"data": [3]
},
{
"type_key": "relay.attrs.Conv2DWinogradAttrs",
"attrs": {
"channels": "13",
"data_layout": "NCHW",
"dilation": "10",
"groups": "1",
"kernel_layout": "OIHW",
"kernel_size": "14",
"out_dtype": "",
"out_layout": "",
"padding": "7",
"strides": "4",
"tile_size": "2"
}
},
{
"type_key": "Array",
"data": [5, 6]
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "Array",
"data": [8, 9]
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "Array",
"data": [11, 12]
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "1"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "512"
}
},
{
"type_key": "Array",
"data": [15, 16]
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "3"
}
},
{
"type_key": "IntImm",
"attrs": {
"dtype": "int32",
"value": "3"
}
}
],
"b64ndarrays": [],
"attrs": {"tvm_version": "0.6.dev"}
}
Any idea about how to solve it?