About TVM GPU problem

Setup

Base the tvmai/ci-gpu:v0.64 docker enviroment. Build from the tvm source code which is the latest.

Problems

1st problem: Using tutorial “from_pytorch.py” , change target to cuda and export the files *.so *.json *.params. Load the files and test the inference time, but when I use the nvidia-smi check the gpu, cannot see the gpu was used, even the inference time is fast like 36ms.

2nd problem: Seting the target like “cuda -libs=cudnn”, when it compile, the inference time faster than target like “cuda” , but when I export the files *.so *.json *.params and load the files to run again, the inference time is slower 3 times than when compile, 2 times slower than “cuda”.

“cuda -libs=cudnn” in compile load the files with “cuda -libs=cudnn” load the files with “cuda”
22.7420 ms 75.9057 ms 36.0942 ms

Code

import torch
import torchvision
import time
import numpy as np
# An instance of your model.
model_name = 'resnet50'
model = getattr(torchvision.models, model_name)(pretrained=False)
model.eval()
# An example input you would normally provide to your model's forward() method.
input_shape = [16, 3, 224, 224]
example = torch.randn(input_shape)
# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(model, example).eval()

#Convert PyTorch graph to Relay graph.
import tvm
from tvm import relay

input_name = 'input0'  # only one input, set it to this name
shape_list = [(input_name, input_shape)]

mod, params = relay.frontend.from_pytorch(traced_script_module, shape_list)

#Compile the graph to llvm target with given input specification
target = 'cuda'
#target = 'cuda -libs=cudnn'
target_host = 'llvm'
with tvm.transform.PassContext(opt_level=3):
    graph, lib, params = relay.build(mod, target=target, target_host = target_host, params=params)

#deploying the compiled model on target.
from tvm.contrib import graph_runtime
ctx = tvm.context(target, 0)
tvm_model = graph_runtime.create(graph, lib, ctx)
tvm_model.set_input(**params)

tvm_s_t = time.time()
for i in range(64):
    tvm_model.set_input(input_name, tvm.nd.array(example))#
    # Execute
    tvm_model.run()
    # Get outputs
    output = tvm_model.get_output(0)
    output = output.asnumpy()
tvm_e_t = (time.time() - tvm_s_t) / 64

print("used time tvm torch:",tvm_e_t)

print("Output model files")
libpath = "./resnet50_cuda_bs16.so"
#libpath = "./resnet50_cuda_cudnn_bs16.so"
lib.export_library(libpath)

graph_json_path = "./resnet50_cuda_bs16.json"
#graph_json_path = "./resnet50_cuda_cudnn_bs16.json"
with open(graph_json_path, 'w') as fo:
    fo.write(graph)

param_path = "./resnet50_cuda_bs16.params"
#param_path = "./resnet50_cuda_cudnn_bs16.params"
with open(param_path, 'wb') as fo:
    fo.write(relay.save_param_dict(params))
import torch
import torchvision
import time
import numpy as np
# An instance of your model.
# An example input you would normally provide to your model's forward() method.
input_shape = [16, 3, 224, 224]
example = torch.randn(input_shape)

#Convert PyTorch graph to Relay graph.
import tvm
from tvm import relay

input_name = 'input0'  # only one input, set it to this name
shape_list = [(input_name, input_shape)]

test_json = 'resnet50_cuda_bs16.json'
test_lib = 'resnet50_cuda_bs16.so'
test_param = 'resnet50_cuda_bs16.params'
#test_json = 'resnet50_cuda_cudnn_bs16.json'
#test_lib = 'resnet50_cuda_cudnn_bs16.so'
#test_param = 'resnet50_cuda_cudnn_bs16.params'

loaded_json = open(test_json).read()
loaded_lib = tvm.runtime.load_module(test_lib)
loaded_params = bytearray(open(test_param, "rb").read())

#Compile the graph to llvm target with given input specification
target = 'cuda'
# target = "cuda -libs=cudnn"

#deploying the compiled model on target.
from tvm.contrib import graph_runtime
ctx = tvm.context(target, 0)
tvm_model = graph_runtime.create(loaded_json, loaded_lib, ctx)
tvm_model.load_params(loaded_params)

torch_s_t = time.time()
for i in range(64):
    tvm_model.set_input(input_name, tvm.nd.array(example))#
    # Execute
    tvm_model.run()
    # Get outputs
    output = tvm_model.get_output(0)
    output = output.asnumpy()
tvm_e_t = (time.time() - torch_s_t) / 64

print("used time tvm torch:",tvm_e_t)