Hi TVM community,
I am new to TVM. For the current master version, if we use “llvm -mcpu=skylake-avx512” or “llvm -mcpu=cascadelake” instead of “llvm” as the target device,
e.g.,
with tvm.target.create(device):
B = topi.nn.conv2d(A, W, stride, padding, dilation, layout='NHWC', out_dtype="int32")
s = topi.generic.schedule_conv2d_nhwc_pack([B])
a = tvm.nd.array(a_np, ctx)
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
func = tvm.build(s, [A, W, B], device)
func(a, w, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
# for device in ['llvm -mcpu=skylake-avx512']:
for device in ['llvm']:
check_device(device)
class DefaultFallback(autotvm.FallbackContext):
def _query_inside(self, target, workload):
key = (target, workload)
if key in self.memory:
return self.memory[key]
cfg = FallbackConfigEntity()
n = 128
k = 128
X = tvm.placeholder((m, k), name='X', dtype="uint8")
W = tvm.placeholder((n, k), name='W', dtype="int8")
peak = 512/16*2*2*2
gops_per_mm = 2*n*m*k
print("Peak {} Gops/s \n".format(peak))
def verify(target="llvm -mcpu=skylake-avx512"):
if not tvm.module.enabled(target):
print("skip because %s is not enabled..." % target)
return
ctx = tvm.context(target, 0)
X = tvm.placeholder((m, k), name='X', dtype="uint8")
W = tvm.placeholder((n, k), name='W', dtype="int8")
pc = dot_16x1x16_int8_int8_int16()
ak = tvm.reduce_axis((0, k), name='k')
w = tvm.nd.array(w_np, ctx)
c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
func = tvm.build(s, [A, W, C], device,
name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
# print(tvm.lower(s, [A, W, C], simple_mode=True))
func(a, w, c)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-3)
# for device in ["llvm"]:
for device in ["llvm -mcpu=skylake-avx512"]:
with autotvm.tophub.context(device): # load tophub pre-tuned parameters
check_device(device)
@nottest
def test_conv2d_NCHWc():
# ResNet50 workloads
verify_group_conv2d_NCHWc_int8(1, 256, 32, 224, 64, 7, 2, 3)
if __name__ == "__main__":
# The test requires Skylake and newer Intel machines to generate the correct
it reports the error like the following:
TVMError: Check failed: is_one(e.region[i]->extent): Tensorize tensor_intrin: Input dimension mismatch with tensor intrin expected shape=[16, 4], given region=[range(min=((j.outer16)/16), ext=(((((j.outer16) + 15)/16) + 1) - j.outer)), range(min=(((((k.outer.outer4) + k.outer.inner)4)/4)16), ext=((((((((k.outer.outer16) + (k.outer.inner4)) + 3)/4)16) + 16) - (k.outer.inner16)) - (k.outer.outer64))), range(min=0, ext=4)]
The same issue is reported here:
If I replace llvm with llvm -mcpu=skylake-avx512 at
https://github.com/dmlc/tvm/blob/master/topi/tests/python/test_topi_conv2d_nhwc_pack_int8.py#L67
, the test fails with the following error
File "/home/ubuntu/workplace/t1/tvm/src/op/tensorize.cc", line 226
TVMError: Check failed:...
status: help wanted
Is there any workaround to solve this issue?