If the target is “llvm”. The terminal infomation like this:
ConfigSpace (len=100, space_map=
0 tile_y: Split(policy=all, product=512, num_outputs=2) len=10
1 tile_x: Split(policy=all, product=512, num_outputs=2) len=10
)
Get devices for measurement successfully!
No: 1 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.07700777053833008, timestamp=1540880974.8627179) [(‘tile_y’, [128, 4]), (‘tile_x’, [64, 8])],None,32
No: 2 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.0590059757232666, timestamp=1540880975.5287843) [(‘tile_y’, [8, 64]), (‘tile_x’, [16, 32])],None,56
No: 3 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.05600571632385254, timestamp=1540880975.6207936) [(‘tile_y’, [512, 1]), (‘tile_x’, [2, 256])],None,80
No: 4 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.06600689888000488, timestamp=1540880976.4048722) [(‘tile_y’, [512, 1]), (‘tile_x’, [1, 512])],None,90
No: 5 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.05500531196594238, timestamp=1540880976.9999316) [(‘tile_y’, [8, 64]), (‘tile_x’, [128, 4])],None,26
No: 6 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.07400727272033691, timestamp=1540880977.0279346) [(‘tile_y’, [128, 4]), (‘tile_x’, [8, 64])],None,62
No: 7 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.05000495910644531, timestamp=1540880977.206952) [(‘tile_y’, [256, 2]), (‘tile_x’, [1, 512])],None,91
No: 8 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.048004865646362305, timestamp=1540880977.223954) [(‘tile_y’, [1, 512]), (‘tile_x’, [16, 32])],None,59
No: 9 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.055005550384521484, timestamp=1540880987.2189534) [(‘tile_y’, [64, 8]), (‘tile_x’, [128, 4])],None,23
No: 10 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(FileNotFoundError(2, ‘系统找不到指定的文件。’, None, 2, None),), error_no=2, all_cost=0.06100630760192871, timestamp=1540880987.2719588) [(‘tile_y’, [64, 8]), (‘tile_x’, [64, 8])],None,33
Finish loading 120 records
Cannot find config for target=llvm, workload=(‘matmul’, 512, 512, 512, ‘float32’). A fallback configuration is used, which may bring great performance regression.
Press any key to continue . . .
It seems that it didn’t work.
My code is from tutorial:Writing tunable template and Using auto-tuner
import logging
import sys
import numpy as np
import tvm
from tvm import autotvm
def matmul_v0(N, L, M, dtype):
A = tvm.placeholder((N, L), name=‘A’, dtype=dtype)
B = tvm.placeholder((L, M), name=‘B’, dtype=dtype)
k = tvm.reduce_axis((0, L), name='k')
C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
s = tvm.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
yo, yi = s[C].split(y, 8)
xo, xi = s[C].split(x, 8)
s[C].reorder(yo, xo, k, yi, xi)
return s, [A, B, C]
@autotvm.template # 1. use a decorator
def matmul_v1(N, L, M, dtype):
A = tvm.placeholder((N, L), name=‘A’, dtype=dtype)
B = tvm.placeholder((L, M), name=‘B’, dtype=dtype)
k = tvm.reduce_axis((0, L), name='k')
C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
s = tvm.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
# 2. get the config object
cfg = autotvm.get_config()
# 3. define search space
cfg.define_knob("tile_y", [1, 2, 4, 8, 16])
cfg.define_knob("tile_x", [1, 2, 4, 8, 16])
# 4. schedule according to config
yo, yi = s[C].split(y, cfg['tile_y'].val)
xo, xi = s[C].split(x, cfg['tile_x'].val)
s[C].reorder(yo, xo, k, yi, xi)
return s, [A, B, C]
@autotvm.template
def matmul(N, L, M, dtype):
A = tvm.placeholder((N, L), name=‘A’, dtype=dtype)
B = tvm.placeholder((L, M), name=‘B’, dtype=dtype)
k = tvm.reduce_axis((0, L), name='k')
C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
s = tvm.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
##### define space begin #####
cfg = autotvm.get_config()
cfg.define_split("tile_y", y, num_outputs=2)
cfg.define_split("tile_x", x, num_outputs=2)
##### define space end #####
# schedule according to config
yo, yi = cfg["tile_y"].apply(s, C, y)
xo, xi = cfg["tile_x"].apply(s, C, x)
s[C].reorder(yo, xo, k, yi, xi)
return s, [A, B, C]
if name == ‘main’:
N, L, M = 512, 512, 512
task = autotvm.task.create(matmul, args=(N, L, M, ‘float32’), target=‘llvm’)
print(task.config_space)
logging.getLogger('autotvm').setLevel(logging.DEBUG)
logging.getLogger('autotvm').addHandler(logging.StreamHandler(sys.stdout))
measure_option = autotvm.measure_option(builder=autotvm.LocalBuilder(),
runner=autotvm.RPCRunner("test", host='localhost', port=9190, number=5, timeout=4,))
# begin tuning, log records to file `matmul.log`
tuner = autotvm.tuner.RandomTuner(task)
tuner.tune(n_trial=10,
measure_option=measure_option,
callbacks=[autotvm.callback.log_to_file('matmul.log')])
# apply history best from log file
with autotvm.apply_history_best('matmul.log'):
with tvm.target.create("llvm"):
s, arg_bufs = matmul(N, L, M, 'float32')
func = tvm.build(s, arg_bufs)
# check correctness
a_np = np.random.uniform(size=(N, L)).astype(np.float32)
b_np = np.random.uniform(size=(L, M)).astype(np.float32)
c_np = a_np.dot(b_np)
c_tvm = tvm.nd.empty(c_np.shape)
func(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)
tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)