I want to have dynamic reduce sizes in a tensor expression. Code seems to compile and lower fine, but when I try to generate CUDA code I get the error:
Not all Vars are passed in api_args: 'a_lu' 'a_lb' 'b_lu' 'b_lb' does not appeared in api_args
No idea what I am doing wrong here. I continue to find the error messages in TVM beyond perplexing.
def dia():
batch = tvm.var("batch")
size = 100
#offsets = tvm.placeholder((4,), dtype="int32", name="offsets")
a_lu, a_lb, b_lu, b_lb = tvm.var("a_lu"), tvm.var("a_lb"), tvm.var("b_lu"), tvm.var("b_lb")
#a_lu, a_lb, b_lu, b_lb = 1, 1, 1, 1
result_lu = a_lu + b_lu
result_lb = a_lb + b_lb
self_width = a_lu + a_lb + 1
b_width = b_lu + b_lb + 1
out_width = result_lu + result_lb + 1
k = tvm.reduce_axis((0, self_width), name='k')
val = 0.0
A = tvm.placeholder((batch, size, self_width), name="A")
B = tvm.placeholder((batch, size, b_width), name="B")
B_pad = tvm.compute(
(batch, size, size),
lambda batch, k, j:
tvm.if_then_else(
tvm.any(k - j < b_lb, j - k > b_lu),
val, B[batch, k, k - j - b_lb]), name="B_pad"
)
C = tvm.compute(
(batch, size, out_width),
lambda batch, i, j: tvm.sum(
A[batch, i, k] * B_pad[batch, k, j],
axis=[k]), name='Out')
s = tvm.create_schedule(C.op)
...
s, args = dia()
tvm.lower(s, args, simple_mode=True)
Lowered code
produce Out {
// attr [iter_var(blockIdx.z, , blockIdx.z)] thread_extent = batch
// attr [A.shared] storage_scope = "shared"
allocate A.shared[float32 * 256]
// attr [B_pad] storage_scope = "local"
allocate B_pad[float32 * 1]
// attr [B_pad.shared] storage_scope = "shared"
allocate B_pad.shared[float32 * 256]
// attr [iter_var(blockIdx.y, , blockIdx.y)] thread_extent = (((((a_lu + b_lu) + a_lb) + b_lb)/16) + 1)
// attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 7
// attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 16
// attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
if (likely(((blockIdx.x*16) < (100 - threadIdx.x)))) {
if (likely(((blockIdx.y*16) < (((((a_lu + b_lu) + a_lb) + b_lb) - threadIdx.y) + 1)))) {
Out[(((blockIdx.y*16) + ((((blockIdx.z*100) + (blockIdx.x*16)) + threadIdx.x)*((((a_lu + b_lu) + a_lb) + b_lb) + 1))) + threadIdx.y)] = 0.000000f
}
}
for (k.outer, 0, (((a_lu + a_lb)/16) + 1)) {
produce A.shared {
if (likely(((blockIdx.x*16) < (100 - threadIdx.x)))) {
if (likely(((k.outer*16) < (((a_lu + a_lb) - threadIdx.y) + 1)))) {
A.shared[((threadIdx.x*16) + threadIdx.y)] = A[(((k.outer*16) + ((((blockIdx.z*100) + (blockIdx.x*16)) + threadIdx.x)*((a_lu + a_lb) + 1))) + threadIdx.y)]
}
}
}
produce B_pad {
if (likely(((k.outer*16) < (100 - threadIdx.x)))) {
if (likely(((blockIdx.y*16) < (100 - threadIdx.y)))) {
B_pad[0] = tvm_if_then_else((((k.outer*16) < (((b_lb + (blockIdx.y*16)) + threadIdx.y) - threadIdx.x)) || (b_lu < ((((blockIdx.y*16) + threadIdx.y) - threadIdx.x) - (k.outer*16)))), 0.000000f, B[((((((k.outer*16) + ((((blockIdx.z*100) + (k.outer*16)) + threadIdx.x)*((b_lu + b_lb) + 1))) + threadIdx.x) - threadIdx.y) - b_lb) - (blockIdx.y*16))])
}
}
}
produce B_pad.shared {
if (likely(((k.outer*16) < (100 - threadIdx.x)))) {
if (likely(((blockIdx.y*16) < (100 - threadIdx.y)))) {
B_pad.shared[((threadIdx.x*16) + threadIdx.y)] = B_pad[0]
}
}
}
for (k.inner, 0, 16) {
if (likely(((blockIdx.x*16) < (100 - threadIdx.x)))) {
if (likely(((blockIdx.y*16) < (((((a_lu + b_lu) + a_lb) + b_lb) - threadIdx.y) + 1)))) {
if (likely(((k.outer*16) < (((a_lu + a_lb) - k.inner) + 1)))) {
Out[(((blockIdx.y*16) + ((((blockIdx.z*100) + (blockIdx.x*16)) + threadIdx.x)*((((a_lu + b_lu) + a_lb) + b_lb) + 1))) + threadIdx.y)] = (Out[(((blockIdx.y*16) + ((((blockIdx.z*100) + (blockIdx.x*16)) + threadIdx.x)*((((a_lu + b_lu) + a_lb) + b_lb) + 1))) + threadIdx.y)] + (A.shared[((threadIdx.x*16) + k.inner)]*B_pad.shared[((k.inner*16) + threadIdx.y)]))
}
}
}
}
}
}
Error
TVMError: Traceback (most recent call last):
[bt] (3) /tvm/build/libtvm.so(TVMFuncCall+0x65) [0x7f7a1e3b9525]
[bt] (2) /tvm/build/libtvm.so(std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), void tvm::runtime::TypedPackedFunc<tvm::LoweredFunc (HalideIR::Internal::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool)>::AssignTypedLambda<tvm::LoweredFunc (*)(HalideIR::Internal::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool)>(tvm::LoweredFunc (*)(HalideIR::Internal::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool))::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)+0x117) [0x7f7a1dbe7a87]
[bt] (1) /tvm/build/libtvm.so(tvm::ir::MakeAPI(HalideIR::Internal::Stmt, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::Array<tvm::NodeRef, void>, int, bool)+0x33a6) [0x7f7a1de4aee6]
[bt] (0) /tvm/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x43) [0x7f7a1dba4493]
File "/content/gdrive/My Drive/tvm/src/pass/make_api.cc", line 188
TVMError: Not all Vars are passed in api_args: 'a_lu' 'a_lb' 'b_lu' 'b_lb' does not appeared in api_args