Wrong output shape and format

Hi all,

Im currently working on a project to convert a mxnet model into a tvm one.
The following script works and I see an improvement in speed.
Nervertheless, I cannot use the output on one of my image as the shape is different.

In mxnet, the output shape is a list of 9 elements with numpy array of shape
(1, 4, 34, 23)),
(1, 8, 34, 23)),
(1, 20, 34, 23)),
(1, 4, 68, 45)),
(1, 8, 68, 45)),
(1, 20, 68, 45)),
(1, 4, 135, 90)),
(1, 8, 135, 90)),
(1, 20, 135, 90))

In TVM, after tuning, my output format is still a list of 9 elements but the shape of each elements is weird.
(1, 4, 34, 23)
(1, 1, 34, 23, 8 )
(1, 4, 34, 23, 5)
(1, 4, 68, 45)
(1, 1, 68, 45, 8)
(1, 4, 68, 45, 5)
(1, 4, 135, 90)
(1, 1, 135, 90, 8)
(1, 4, 135, 90, 5)

I tried many manipulation of the matrices but I cannot find one with similar output.

Any ideas ?

import numpy as np

import nnvm.testing
import nnvm.compiler
import tvm
import mxnet as mx
from tvm import autotvm
import tvm.relay as relay
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
import tvm.contrib.graph_runtime as runtime


def get_network(name, batch_size):
    prefix,epoch = "mnet.10",0
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    opt_level = 3
    shape_dict = {'data': (1, 3, 1080, 720)}
    nnvm_sym, nnvm_params = nnvm.frontend.from_mxnet(sym, arg_params, aux_params)
    input_shape = (1, 3, 1080, 720)
    return nnvm_sym, nnvm_params, input_shape

target = "llvm -mcpu=skylake"

batch_size = 1
dtype = "float32"
model_name = "mnet10"
log_file = "%s.log" % model_name

num_threads = 3
os.environ["TVM_NUM_THREADS"] = str(num_threads)

tuning_option = {
    'log_filename': log_file,
    'tuner': 'random',
    'early_stopping': None,
    'n_trial' : 10,
    'measure_option': autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(number=10, repeat=1,
                                   min_repeat_ms=1000),
    ),
}

def tune_kernels(tasks,
                 measure_option,
                 n_trial =10,
                 tuner='ga',
                 early_stopping=None,
                 log_filename='tuning.log'):
    for i, tsk in enumerate(tasks):
        prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
        # converting conv2d tasks to conv2d_NCHWc tasks
        op_name = tsk.workload[0]
        if op_name == 'conv2d':
            func_create = 'topi_x86_conv2d_NCHWc'
        elif op_name == 'depthwise_conv2d_nchw':
            func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw'
        else:
            raise ValueError("Tuning {} is not supported on x86".format(op_name))
        task = autotvm.task.create(func_create, args=tsk.args,
                                   target=target, template_key='direct')
        task.workload = tsk.workload
        if tuner == 'ga':
            tuner_obj = GATuner(task, pop_size=50)
        else:
            raise ValueError("Invalid tuner: " + tuner)
        # do tuning
        n_trial= 20
        tuner_obj.tune(n_trial=n_trial,
                       early_stopping=early_stopping,
                       measure_option=measure_option,
                       callbacks=[
                           autotvm.callback.progress_bar(n_trial, prefix=prefix),
                           autotvm.callback.log_to_file(log_filename)])

net, params, data_shape = get_network(model_name, batch_size)
tasks = autotvm.task.extract_from_graph(net, target=target,
                                        shape={'data': data_shape}, dtype=dtype,
                                        symbols=(nnvm.sym.conv2d,))

print("Tuning...")
tune_kernels(tasks, **tuning_option)
# compile kernels with history best records
with autotvm.apply_history_best(log_file):
    print("Compile...")
    with nnvm.compiler.build_config(opt_level=3):
        graph, lib, params = nnvm.compiler.build(
            net, target=target, shape={'data': data_shape}, params=params, dtype=dtype)
    # upload parameters to device
    ctx = tvm.cpu()
    data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
    module = runtime.create(graph, lib, ctx)
    module.set_input('data', data_tvm)
    module.set_input(**params)
    # evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
        (np.mean(prof_res), np.std(prof_res)))
lib.export_library("new_deploy_tuned_lib3.so")
print('lib export succeefully')
with open("new_deploy_tuned_graph3.json", "w") as fo:
    fo.write(graph.json())

with open("new_deploy_tuned_param3.params", "wb") as fo:
    fo.write(nnvm.compiler.save_param_dict(params))
`

Any ideas ?

I don’t know if my post was clear enough ? Do you need more infos ?