Hi,
When I comment the print statement “print(dict_params)” in the code below, the inference time is greatly increased. This behavior is unexpected. I would really appreciate any help with this issue.
The worflow I am following is:
- Define a relay.nn.conv2d layer
- turn it into a module
- find execution time using module.time_evaluator
#Code start here import os import sys import numpy as np import tvm import topi import logging from tvm import autotvm from tvm import relay from tvm.relay import testing from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner from tvm.autotvm.graph_tuner import DPTuner, PBQPTuner import tvm.contrib.graph_runtime as runtime #Details about the target (CPU/GPU) target = "llvm -mcpu=core-avx2" target_host = "llvm" batch_size = 1 dtype = "float32" #Set number of threads num_threads = 16 os.environ["TVM_NUM_THREADS"] = str(num_threads) #Set the input name of the graph input_name = "data" #Define convolution properties data_shape = (batch_size, 3, 224, 224) kernel_shape = (64, 3, 7, 7) kernel_size = (kernel_shape[2], kernel_shape[3]) strides = (2,2) padding = (3,3,3,3) dilation = (1,1) data = relay.var("data", shape=data_shape, dtype=dtype) kernel = relay.var("kernel", shape=kernel_shape, dtype=dtype) out = relay.nn.conv2d(data, kernel, strides=strides, padding=padding, dilation=dilation, channels = 64, kernel_size = kernel_size, data_layout='NCHW', out_dtype=dtype) #out = relay.nn.relu(out) mod = relay.Module.from_expr(out) print(mod) ctx = tvm.cpu() kernel_weights = tvm.nd.array(np.ones(kernel_shape, dtype=dtype), ctx) dict_params = {'weights': kernel_weights} graph, lib, params = relay.build_module.build(mod, params = dict_params, target=target, target_host=target_host) #graph, lib, params = relay.build_module.build(mod, target=target, target_host=target_host) print(dict_params) print(ctx) input_name = "data" data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype)) module = runtime.create(graph, lib, ctx) module.set_input(input_name, data_tvm) module.set_input(**params) #evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", ctx, number=4, repeat=100) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond print(prof_res) print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))