[Tensorize] Tensorize failed after reorder

Hi all,
case below can reproduce this issue. we explore a little bit, found that when do CHECK, still using stale axis info, when disable these CHECKS, tensorize can give the expected result. we raise this issue for the better solution of this case.

TensorEngine/src/op/tensorize.cc:238: Check failed: is_one(e.region[i]->extent) Tensorize tensor_intrin: Input dimension mismatch with tensor intrin expected shape=[16], given region=[range(min=0, ext=16), range(min=(j + 0), ext=1), range(min=(k + 0), ext=1)]

Thanks,

import tvm
import numpy as np

    def intrin_vadd(n):
        x = tvm.placeholder((n,), name='vx')
        y = tvm.placeholder((n,), name='vy')
        z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
        def intrin_func(ins, outs):
            xx, yy = ins
            zz = outs[0]
            return tvm.call_packed("vadd", xx, yy, zz)
        with tvm.build_config(offset_factor=16):
            return tvm.decl_tensor_intrin(z.op, intrin_func)


    def test_tensorize_vadd():
        m = 16
        n = 16
        l = 16
        x = tvm.placeholder((m,n,l), name='x')
        y = tvm.placeholder((m,n,l), name='y')
        z = tvm.compute(x.shape, lambda i,j,k: x[i,j,k] + y[i,j,k], name='z')

        def check(factor):
            s = tvm.create_schedule(z.op)
            xa, xb, xc = s[z].op.axis
            s[z].reorder(xb,xc,xa)
            vadd = intrin_vadd(factor)
            s[z].tensorize(xa, vadd)
            s = s.normalize()
            print(tvm.lower(s, [x, y, z], simple_mode=True))

        check(16)

    test_tensorize_vadd()

hi,
I have a question about the solution code in https://github.com/dmlc/tvm/issues/1625

def intrin_vadd(n):
    x = tvm.placeholder((n, 1, 1), name='vx')
    y = tvm.placeholder((n, 1, 1), name='vy')
    z = tvm.compute(x.shape, lambda i, j, k: x[i, j, k] + y[i, j, k], name='z')
    def intrin_func(ins, outs):
        xx, yy = ins
        zz = outs[0]
        return tvm.call_packed("vadd", xx, yy, zz)


strides = [tvm.var('so'), tvm.var('si'), 1]
offset_factor = 1
xb = tvm.decl_buffer(x.shape, x.dtype,
                     name="xb",
                     offset_factor=offset_factor,
                     strides=strides)
yb = tvm.decl_buffer(y.shape, y.dtype,
                     name="yb",
                     offset_factor=offset_factor,
                     strides=strides)
zb = tvm.decl_buffer(z.shape, z.dtype,
                     name="zb",
                     offset_factor=offset_factor,
                     strides=strides)
binds = {x: xb, y: yb, z: zb}
return tvm.decl_tensor_intrin(z.op, intrin_func, binds=binds)


def test_tensori
ze_vadd():
m = 16
n = 16
l = 16
x = tvm.placeholder((m,n, l), name='x')
y = tvm.placeholder((m,n, l), name='y')
z = tvm.compute(x.shape, lambda i,j, k: x[i,j, k] + y[i,j, k], name='z')

def check(factor):
    s = tvm.create_schedule(z.op)
    xa, xb, xc = s[z].op.axis
    s[z].reorder(xb, xc, xa)
    print(tvm.lower(s, [x, y, z], simple_mode=True))
    vadd = intrin_vadd(factor)
    s[z].tensorize(xa, vadd)
    s = s.normalize()
    print(tvm.lower(s, [x, y, z], simple_mode=True))

check(16)

test_tensorize_vadd()

After the reorder, xa become the innermost axis. Why should we tensorize it with a tensor with 3 dims. It’s just one loop. And if I change the intrin tensor to 1 dim ,error will occour.

I guess the tensorize still use the axis order before reorder, that’s why we need to add two ones to the shape to make the axis position right. And this is also why the stride conflict with the shape. am I right?