During compiling, I got the warnings like this:
WARNING:autotvm:Cannot find config for target=llvm, workload=('conv2d', (1, 512, 96, 96, 'float32'), (512, 512, 1, 1, 'float32'), (2, 2), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
And the inference is slower than onnxruntime, my code is like this:
import onnx
import numpy as np
import tvm
import tvm.relay as relay
import cv2
import time
input_shape = (1, 3, 768, 768)
shape_dict = {'0': input_shape}
impth = 'segm_pic.png'
mdpth = './tmp/model_final_ema.onnx'
opt_level = 3
target = 'cuda'
ctx = tvm.gpu()
im = cv2.imread(impth)
im = cv2.resize(im, (input_shape[2:][::-1]))
im = (im - np.array([123, 117, 104])) / np.array([58.4, 57.1, 57.4])
im = im.transpose((2, 0, 1))[np.newaxis, :].astype('float32')
model = onnx.load(mdpth)
mod, params = relay.frontend.from_onnx(model, shape_dict, dtype='float32', opset=11)
with relay.build_config(opt_level=opt_level):
intrp = relay.build_module.create_executor('graph', mod, ctx, target)
in_im = tvm.nd.array(im)
t1 = time.time()
for i in range(100):
out = intrp.evaluate()(in_im, **params)[0].asnumpy()
t2 = time.time()
import onnxruntime as ort
import onnxruntime.backend as backend
model = onnx.load(mdpth)
sess = backend.prepare(model, device='GPU')
t3 = time.time()
for i in range(100):
out_rt = backend.run(sess, im, device='GPU')[0]
t4 = time.time()
print(t2 - t1)
print(t4 - t3)
The result shows that tvm is even slower than onnxruntime. Is this the truth, or what is the correct way to use tvm on this?