[Relay][Concatenate] Can not concatenate outputs of conv2d with different number of channels

Hi, everyone,
The following code define a relay program that do two convolution operations and concatenate their results.

import tvm
import numpy as np
import tvm.relay as relay

a = relay.var('a', shape=(1, 5, 32, 32))
p1 = relay.var('p1', shape=(2, 5, 3, 3))
p2 = relay.var('p2', shape=(3, 5, 3, 3))
c1 = relay.nn.conv2d(a, p1, channels=2, kernel_size=(3, 3), padding=(1, 1))   # (1, 2, 32, 32)
c2 = relay.nn.conv2d(a, p2, channels=3, kernel_size=(3, 3), padding=(1, 1))   # (1, 3, 32, 32)
c = relay.concatenate([c1, c2], axis=1)  # (1, 5, 32, 32)
func = relay.Function([a, p1, p2], c)
relay_module = relay.Module.from_expr(func)
params = {
    'p1': tvm.nd.array(np.random.rand(2, 5, 3, 3).astype(np.float32)),
    'p2': tvm.nd.array(np.random.rand(3, 5, 3, 3).astype(np.float32))
}
print(func.astext())

with relay.build_config(opt_level=3):
    graph, tvm_module, params = relay.build(relay_module, target='llvm', params=params)

The text-format of the program is:

v0.0.3
fn (%a: Tensor[(1, 5, 32, 32), float32], %p1: Tensor[(2, 5, 3, 3), float32], %p2: Tensor[(3, 5, 3, 3), float32]) {
  %0 = nn.conv2d(%a, %p1, padding=[1, 1], channels=2, kernel_size=[3, 3]);
  %1 = nn.conv2d(%a, %p2, padding=[1, 1], channels=3, kernel_size=[3, 3]);
  %2 = (%0, %1);
  concatenate(%2, axis=1)
}

When I build the relay module, following error occurs:

Traceback (most recent call last):
  File "/home/yaoyao/nfs/repos/tvm_workplace/exp_error/cat/main.py", line 43, in <module>
    case1()
  File "/home/yaoyao/nfs/repos/tvm_workplace/exp_error/cat/main.py", line 22, in case1
    graph, tvm_module, params = relay.build(relay_module, target='llvm', params=params)
  File "/home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/relay/build_module.py", line 207, in build
    graph_json, mod, params = bld_mod.build(func, target, target_host, params)
  File "/home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/relay/build_module.py", line 108, in build
    self._build(func, target, target_host)
  File "tvm/_ffi/_cython/./function.pxi", line 310, in tvm._ffi._cy3.core.FunctionBase.__call__
  File "tvm/_ffi/_cython/./function.pxi", line 245, in tvm._ffi._cy3.core.FuncCall
  File "tvm/_ffi/_cython/./function.pxi", line 234, in tvm._ffi._cy3.core.FuncCall3
  File "tvm/_ffi/_cython/./base.pxi", line 171, in tvm._ffi._cy3.core.CALL
tvm._ffi.base.TVMError: Traceback (most recent call last):
  [bt] (8) /home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::relay::backend::RelayBuildModule::Optimize(tvm::relay::Module, tvm::Map<tvm::Integer, tvm::Target, void, void> const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::runtime::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, tvm::runtime::NDArray> > > const&)+0x671) [0x7f55927ebbf1]
  [bt] (7) /home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::relay::transform::Pass::operator()(tvm::relay::Module const&) const+0x5c) [0x7f55927e491c]
  [bt] (6) /home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::relay::transform::SequentialNode::operator()(tvm::relay::Module const&, tvm::relay::transform::PassContext const&) const+0x3b1) [0x7f5592a9d971]
  [bt] (5) /home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::relay::transform::FunctionPassNode::operator()(tvm::relay::Module const&, tvm::relay::transform::PassContext const&) const+0x7d0) [0x7f5592a9c610]
  [bt] (4) /home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::relay::ModuleNode::Add(tvm::relay::GlobalVar const&, tvm::relay::Function const&, bool)+0x826) [0x7f55928a3a26]
  [bt] (3) /home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::relay::InferType(tvm::relay::Function const&, tvm::relay::Module const&, tvm::relay::GlobalVar const&)+0x39a) [0x7f5592ad297a]
  [bt] (2) /home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::relay::TypeInferencer::Infer(tvm::relay::Expr)+0x71) [0x7f5592ad1b31]
  [bt] (1) /home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/libtvm.so(tvm::relay::ErrorReporter::RenderErrors(tvm::relay::Module const&, bool)+0x181e) [0x7f559287418e]
  [bt] (0) /home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x43) [0x7f559245bf23]
  File "/home/yaoyao/repos/tvm/src/relay/ir/error.cc", line 133
TVMError: 
Error(s) have occurred. The program has been annotated with them:

In `main`: 
v0.0.3
fn (%a: Tensor[(1, 5, 32, 32), float32]) -> Tensor[(1, 5, 32, 32), float32] {
  %0 = layout_transform(%a, src_layout="NCHW", dst_layout="NCHW1c");
  %1 = layout_transform(meta[relay.Constant][0], src_layout="OIHW", dst_layout="OIHW1i2o");
  %2 = nn.contrib_conv2d_NCHWc(%0, %1, meta[relay.attrs.Conv2DAttrs][0]);
  %3 = layout_transform(meta[relay.Constant][1], src_layout="OIHW", dst_layout="OIHW1i3o");
  %4 = nn.contrib_conv2d_NCHWc(%0, %3, meta[relay.attrs.Conv2DAttrs][1]);
  %5 = layout_transform(%4, src_layout="NCHW3c", dst_layout="NCHW2c");
  %6 = (%2, %5);
  %7 = concatenate(%6, axis=1);
  layout_transform(%7, src_layout="NCHW2c", dst_layout="NCHW") in particular dimension 1 conflicts 4 does not match 5; unable to unify: `Tensor[(1, 4, 32, 32), float32]` and `Tensor[(1, 5, 32, 32), float32]`; 
}
// meta data omitted. you can use show_meta_data=True to include meta data

Thanks a lot if you can help!
(I found that when the number of channels of both convolution layers are the same, the error will not occur. )

setting opt_level=3 will apply conv2d_NCHWc instead of conv2d. This will cause issue when concatenate is involved and using default schedule for conv2d (Without autotuning). You can either 1) Set opt_level < 3. 2) Follow tune_relay_x86.py tutorial to tune the conv2d kernel. Graph tuner will automatically select proper schedules which can be fit into concatenate.

HI @kevinthesun

Is this the same situation as I described in the following post:

@kevinthesun Thanks for your reply!
As you said, I disabled the “AlterOpLayout” optimization and everything goes on well now.

    a = relay.var('a', shape=(1, 5, 32, 32))
    p1 = relay.var('p1', shape=(2, 5, 3, 3))
    p2 = relay.var('p2', shape=(3, 5, 3, 3))
    c1 = relay.nn.conv2d(a, p1, channels=2, kernel_size=(3, 3), padding=(1, 1))
    c2 = relay.nn.conv2d(a, p2, channels=3, kernel_size=(3, 3), padding=(1, 1))
    c = relay.concatenate([c1, c2], axis=1)
    func = relay.Function([a, p1, p2], c)
    relay_module = relay.Module.from_expr(func)
    params = {
        'p1': tvm.nd.array(np.random.rand(2, 5, 3, 3).astype(np.float32)),
        'p2': tvm.nd.array(np.random.rand(3, 5, 3, 3).astype(np.float32))
    }
    print(func.astext())
    with relay.build_config(opt_level=3, disabled_pass=set(['AlterOpLayout'])):
        graph, tvm_module, params = relay.build(relay_module, target='llvm', params=params)

For complicated models such as SSD, I suggest applying autotuning.

@kevinthesun I replace the get_network function in the x86 auto-tune tutorial with the following function:

def get_network(name, batch_size):
    a = relay.var('a', shape=(1, 5, 32, 32))
    p1 = relay.var('p1', shape=(2, 5, 3, 3))
    p2 = relay.var('p2', shape=(3, 5, 3, 3))
    c1 = relay.nn.conv2d(a, p1, channels=2, kernel_size=(3, 3), padding=(1, 1))
    c2 = relay.nn.conv2d(a, p2, channels=3, kernel_size=(3, 3), padding=(1, 1))
    c = relay.concatenate([c1, c2], axis=1)
    func = relay.Function([a, p1, p2], c)
    relay_module = relay.Module.from_expr(func)
    params = {
        'p1': tvm.nd.array(np.random.rand(2, 5, 3, 3).astype(np.float32)),
        'p2': tvm.nd.array(np.random.rand(3, 5, 3, 3).astype(np.float32))
    }
    return relay_module, params, (1, 5, 32, 32), (1, 5, 32, 32)

Then run the tutorial code, and during tuning graph, the following error occurs:

Extract tasks...
Tuning...
[Task  1/ 2]  Current/Best:    6.22/  17.07 GFLOPS | Progress: (48/48) | 149.86 s Done.
[Task  2/ 2]  Current/Best:    6.53/  17.20 GFLOPS | Progress: (48/48) | 152.86 s Done.
Cannot find config for target=llvm -device=tracing, workload=('conv2d', (1, 5, 32, 32, 'float32'), (2, 5, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=llvm -device=tracing, workload=('conv2d', (1, 5, 32, 32, 'float32'), (3, 5, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
2019-07-27 12:57:48,213 INFO Start to benchmark layout transformation...
2019-07-27 12:57:48,213 INFO Benchmarking layout transformation successful.
2019-07-27 12:57:48,213 INFO Start to run dynamic programming algorithm...
2019-07-27 12:57:48,213 INFO Start forward pass...
2019-07-27 12:57:48,213 INFO Finished forward pass.
2019-07-27 12:57:48,213 INFO Start backward pass...
Traceback (most recent call last):
  File "/home/yaoyao/nfs/repos/tvm_workplace/tutorials/finished/auto_tune/x86_autotune.py", line 179, in <module>
    tune_and_evaluate(tuning_option)
  File "/home/yaoyao/nfs/repos/tvm_workplace/tutorials/finished/auto_tune/x86_autotune.py", line 153, in tune_and_evaluate
    tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)
  File "/home/yaoyao/nfs/repos/tvm_workplace/tutorials/finished/auto_tune/x86_autotune.py", line 140, in tune_graph
    executor.run()
  File "/home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/autotvm/graph_tuner/dynamic_programming_tuner.py", line 188, in run
    self._backward()
  File "/home/yaoyao/anaconda3/envs/python37/lib/python3.7/site-packages/tvm-0.6.dev0-py3.7-linux-x86_64.egg/tvm/autotvm/graph_tuner/dynamic_programming_tuner.py", line 93, in _backward
    num_states = states_list[0][3].size
IndexError: list index out of range

Is there any problem inside the relay program?

https://github.com/dmlc/tvm/blob/master/tutorials/autotvm/tune_relay_x86.py#L91 Change the input name. And this PR is needed: https://github.com/dmlc/tvm/pull/3649