2D Convolution Optimization example doesn't work

I tried the 2D Convolution Optimization tutorial with different size of in_channels and out_channels but the code doesn’t seem to work.
I would like to try the convolution with different size of input channel. Can anyone give me some direction?

@thierry

# 2D convolution layer dimensions taken from ResNet-18 architecture
# (9th convolutional layer)
batch_size = 1
height = 14
width = 14
in_channels = 16 # -----> it was 256
out_channels = 16  # -----> it was 256
kernel_h = 3
kernel_w = 3
pad_h = 1
pad_w = 1
stride_h = 1
stride_w = 1
...

data_np = np.random.randint(-128, 128, size=(batch_size, in_channels, height, width)).astype(data.dtype)
kernel_np = np.random.randint(-128, 128, size=(out_channels, in_channels, kernel_h, kernel_w)).astype(kernel.dtype)
# data_np.shape = (1,16,14,14)
# kernel_np.shape = (16,16,3,3)
...

Here is the output:

Traceback (most recent call last):
  File "convolution_opt_simple.py", line 651, in <module>
    print(vta.lower(s, [data, kernel, res], simple_mode=True))
  File "~/tvm/vta/python/vta/build_module.py", line 83, in lower
    return tvm.lower(*args, **kwargs)
  File "~/tvm/python/tvm/build_module.py", line 352, in lower
    stmt = f(stmt)
  File "~/tvm/python/tvm/_ffi/_ctypes/function.py", line 185, in __call__
    ctypes.byref(ret_val), ctypes.byref(ret_tcode)))
  File "~/tvm/python/tvm/_ffi/base.py", line 72, in check_call
    raise TVMError(py_str(_LIB.TVMGetLastError()))
tvm._ffi.base.TVMError: TVMCall CFunc Error:
Traceback (most recent call last):
  File "~/tvm/python/tvm/_ffi/_ctypes/function.py", line 55, in cfun
    rv = local_pyfunc(*pyargs)
  File "~/tvm/vta/python/vta/ir_pass.py", line 561, in inject_dma_intrin
    return tvm.ir_pass.InjectCopyIntrin(stmt_in, "dma_copy", _inject_copy)
  File "~/tvm/python/tvm/_ffi/_ctypes/function.py", line 185, in __call__
    ctypes.byref(ret_val), ctypes.byref(ret_tcode)))
  File "~/tvm/python/tvm/_ffi/base.py", line 72, in check_call
    raise TVMError(py_str(_LIB.TVMGetLastError()))
tvm._ffi.base.TVMError: [17:08:33] ~/tvm/src/pass/inject_copy_intrin.cc:30: Check failed: MatchCopyPattern(op->body, &ret) Cannot match copy pattern of for (i0, 0, 8) {
  for (i2, 0, 3) {
    for (i3, 0, 3) {
      for (i4, 0, 16) {
        for (i5, 0, 16) {
          if (likely((((cthread*8) + i0) < 1))) {
            kernel_buf[(((((i0*2304) + (i2*768)) + (i3*256)) + (i4*16)) + i5)] = kernel[((((((cthread*18432) + (i0*2304)) + (i2*768)) + (i3*256)) + (i4*16)) + i5)]
          }
        }
      }
    }
  }
}


Stack trace returned 10 entries:
[bt] (0) ~/tvm/build/libtvm.so(dmlc::StackTrace[abi:cxx11](unsigned long)+0x9d) [0x7f97ff485cfd]
[bt] (1) ~/tvm/build/libtvm.so(tvm::ir::CopyIntrinInjector::Mutate_(HalideIR::Internal::AttrStmt const*, HalideIR::Internal::Stmt const&)+0x4fb) [0x7f97ff676acb]
[bt] (2) ~P/tvm/build/libtvm.so(+0xaa8ca4) [0x7f97ff635ca4]
[bt] (3) ~/tvm/build/libtvm.so(std::_Function_handler<HalideIR::Internal::Stmt (tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*), tvm::IRFunctor<HalideIR::Internal::Stmt (tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*)>::set_dispatch<HalideIR::Internal::AttrStmt>(std::function<HalideIR::Internal::Stmt (HalideIR::Internal::AttrStmt const*, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*)>)::{lambda(tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*)#1}>::_M_invoke(std::_Any_data const&, tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*&&)+0x53) [0x7f97ff63f843]
[bt] (4) ~/tvm/build/libtvm.so(tvm::IRFunctor<HalideIR::Internal::Stmt (tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*)>::operator()(tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*) const+0x11b) [0x7f97ff4d755b]
[bt] (5) ~/tvm/build/libtvm.so(tvm::ir::IRMutator::Mutate(HalideIR::Internal::Stmt)+0x5b) [0x7f97ff5d9cbb]
[bt] (6) ~/tvm/build/libtvm.so(tvm::ir::IRMutator::Mutate_(HalideIR::Internal::ProducerConsumer const*, HalideIR::Internal::Stmt const&)+0x49) [0x7f97ff639719]
[bt] (7) ~/tvm/build/libtvm.so(+0xaa8fb4) [0x7f97ff635fb4]
[bt] (8) ~/tvm/build/libtvm.so(std::_Function_handler<HalideIR::Internal::Stmt (tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*), tvm::IRFunctor<HalideIR::Internal::Stmt (tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*)>::set_dispatch<HalideIR::Internal::ProducerConsumer>(std::function<HalideIR::Internal::Stmt (HalideIR::Internal::ProducerConsumer const*, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*)>)::{lambda(tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*)#1}>::_M_invoke(std::_Any_data const&, tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*&&)+0x53) [0x7f97ff63fbc3]
[bt] (9) ~/tvm/build/libtvm.so(tvm::IRFunctor<HalideIR::Internal::Stmt (tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*)>::operator()(tvm::NodeRef const&, HalideIR::Internal::Stmt const&, tvm::ir::IRMutator*) const+0x11b) [0x7f97ff4d755b]