Hi there,
I’ll break the answer into two parts
Running remotely
The runner you use for the remote version will be different. Here’s an if statement that sets up a configuration depending on whether or not you’re running locally.
You can note the differences
device_info = {
'device_key': args.device_key,
'rpc_address': "0.0.0.0",
'rpc_port': args.host_port,
'target' : args.target_string
}
tuning_option = {
'tuner': 'xgb_knob',
# 'tuner': 'ga',
'n_trial': args.trials,
'early_stopping': 1000,
'log_filename': args.model_name + '.log'
}
if args.target_type == "local":
device_info['remote'] = False
tuning_option['measure_option'] = autotvm.measure_option(
builder=autotvm.LocalBuilder(),
runner=autotvm.LocalRunner(number=1, repeat=1,
min_repeat_ms=1000))
elif args.target_type == "remote":
device_info['remote'] = True
tuning_option['measure_option'] = autotvm.measure_option(
builder=autotvm.LocalBuilder(
build_func='ndk' if use_android else 'default'),
runner=autotvm.RPCRunner(device_info['device_key'],
host=device_info['rpc_address'],
port=device_info['rpc_port'],
number=5, timeout=10,))
Getting the inference time cost
The output of the autotuning process is a config file of a configuration for your computation graph in TVM (e.g. ResNet50.log
).
To evaluate it and get the inference time, add something like the following to your script:
# compile kernels with history best records
with autotvm.apply_history_best(tuning_option['log_filename']):
print("Compile...")
with relay.build_config(opt_level=3):
graph, lib, params = relay.build_module.build(
mod, target=target, params=params)
# upload parameters to device
if device_info['remote']:
# export library
tmp = tempdir()
if use_android:
from tvm.contrib import ndk
filename = "net.so"
lib.export_library(tmp.relpath(filename), ndk.create_shared)
else:
filename = "net.tar"
lib.export_library(tmp.relpath(filename))
# upload module to device
print("Upload...")
remote = autotvm.measure.request_remote(device_info['device_key'],
device_info['rpc_address'],
device_info['rpc_port'],
timeout=10000)
remote.upload(tmp.relpath(filename))
rlib = remote.load_module(filename)
ctx = remote.context(str(target), 0)
module = runtime.create(graph, rlib, ctx)
else:
ctx = tvm.cpu()
module = runtime.create(graph, lib, ctx)
data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
module.set_input(input_names[0], data_tvm)
module.set_input(**params)
# evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))