I really can not reproduce this error. Currently, master branch should only occur this error:
num_filter, _, kernel_h, kernel_w = get_const_tuple(Filter.shape)
ValueError: too many values to unpack (expected 4)
I will pull a request soon.
My tune script:
import os
import numpy as np
import tvm
from tvm import autotvm
from tvm import relay
import tvm.relay.testing
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
from tvm.contrib.util import tempdir
import tvm.contrib.graph_runtime as runtime
def get_network(name, batch_size):
"""Get the symbol definition and random weight of a network"""
input_shape = (batch_size, 3, 224, 224)
output_shape = (batch_size, 1000)
if "resnet" in name:
n_layer = int(name.split('-')[1])
mod, params = relay.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype)
elif "vgg" in name:
n_layer = int(name.split('-')[1])
mod, params = relay.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype)
elif name == 'mobilenet':
mod, params = relay.testing.mobilenet.get_workload(batch_size=batch_size)
elif name == 'squeezenet_v1.1':
mod, params = relay.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1', dtype=dtype)
elif name == 'inception_v3':
input_shape = (1, 3, 299, 299)
mod, params = relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
elif name == 'mxnet':
# an example for mxnet model
from mxnet.gluon.model_zoo.vision import get_model
block = get_model('resnet18_v1', pretrained=True)
mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype)
net = mod["main"]
net = relay.Function(net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs)
mod = relay.Module.from_expr(net)
else:
raise ValueError("Unsupported network: " + name)
return mod, params, input_shape, output_shape
#### DEVICE CONFIG ####
# Replace "aarch64-linux-gnu" with the correct target of your board.
# This target is used for cross compilation. You can query it by :code:`gcc -v` on your device.
target = tvm.target.create('llvm -device=arm_cpu -target=aarch64-linux-gnu')
# Also replace this with the device key in your tracker
device_key = 'rasp'
# Set this to True if you use android phone
use_android = False
#### TUNING OPTION ####
network = 'mobilenet'
log_file = "%s.%s.log" % (device_key, network)
dtype = 'float32'
tuning_option = {
'log_filename': log_file,
'tuner': 'xgb',
'n_trial': 20,
'early_stopping': 8,
'measure_option': autotvm.measure_option(
builder=autotvm.LocalBuilder(
build_func='ndk' if use_android else 'default'),
runner=autotvm.RPCRunner(
device_key, host='0.0.0.0', port=9198,
number=5,
timeout=10,
),
),
}
# You can skip the implementation of this function for this tutorial.
def tune_tasks(tasks,
measure_option,
tuner='xgb',
n_trial=1000,
early_stopping=None,
log_filename='tuning.log',
use_transfer_learning=True,
try_winograd=True,
try_spatial_pack_depthwise=True):
if try_winograd:
for i in range(len(tasks)):
try: # try winograd template
tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
tasks[i].target, tasks[i].target_host, 'winograd')
input_channel = tsk.workload[1][1]
if input_channel >= 64:
tasks[i] = tsk
except Exception:
pass
# if we want to use spatial pack for depthwise convolution
if try_spatial_pack_depthwise:
tuner = 'xgb_knob'
for i in range(len(tasks)):
if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw':
tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
tasks[i].target, tasks[i].target_host,
'contrib_spatial_pack')
tasks[i] = tsk
# create tmp log file
tmp_log_file = log_filename + ".tmp"
if os.path.exists(tmp_log_file):
os.remove(tmp_log_file)
for i, tsk in enumerate(reversed(tasks)):
prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
# create tuner
if tuner == 'xgb' or tuner == 'xgb-rank':
tuner_obj = XGBTuner(tsk, loss_type='rank')
elif tuner == 'xgb_knob':
tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob')
elif tuner == 'ga':
tuner_obj = GATuner(tsk, pop_size=50)
elif tuner == 'random':
tuner_obj = RandomTuner(tsk)
elif tuner == 'gridsearch':
tuner_obj = GridSearchTuner(tsk)
else:
raise ValueError("Invalid tuner: " + tuner)
if use_transfer_learning:
if os.path.isfile(tmp_log_file):
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
# do tuning
n_trial = min(n_trial, len(tsk.config_space))
tuner_obj.tune(n_trial=n_trial,
early_stopping=early_stopping,
measure_option=measure_option,
callbacks=[
autotvm.callback.progress_bar(n_trial, prefix=prefix),
autotvm.callback.log_to_file(tmp_log_file)])
# pick best records to a cache file
autotvm.record.pick_best(tmp_log_file, log_filename)
os.remove(tmp_log_file)
def tune_and_evaluate(tuning_opt):
# extract workloads from relay program
print("Extract tasks...")
mod, params, input_shape, _ = get_network(network, batch_size=1)
tasks = autotvm.task.extract_from_program(mod["main"], target=target,
params=params,
ops=(relay.op.nn.conv2d,))
# run tuning tasks
print("Tuning...")
tune_tasks(tasks, **tuning_opt)
# compile kernels with history best records
with autotvm.apply_history_best(log_file):
print("Compile...")
with relay.build_config(opt_level=3):
graph, lib, params = relay.build_module.build(
mod, target=target, params=params)
# export library
tmp = tempdir()
if use_android:
from tvm.contrib import ndk
filename = "net.so"
lib.export_library(tmp.relpath(filename), ndk.create_shared)
else:
filename = "net.tar"
lib.export_library(tmp.relpath(filename))
# upload module to device
print("Upload...")
remote = autotvm.measure.request_remote(device_key, '0.0.0.0', 9198,
timeout=10000)
remote.upload(tmp.relpath(filename))
rlib = remote.load_module(filename)
# upload parameters to device
ctx = remote.context(str(target), 0)
module = runtime.create(graph, rlib, ctx)
data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
module.set_input('data', data_tvm)
module.set_input(**params)
# evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=10)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))
# We do not run the tuning in our webpage server since it takes too long.
# Uncomment the following line to run it by yourself.
tune_and_evaluate(tuning_option)
there is no special modification.