Hello,
Similar to TVM access beyond array boundary, I was wondering why TVM at the TIR level does not do any checking for out of bound array access.
import tvm
from tvm import te
batch_size = 1
height = 14
width = 14
in_channels = 256
out_channels = 256
kernel_h = 3
kernel_w = 3
pad_h = 1
pad_w = 1
stride_h = 1
stride_w = 1
data_shape = (batch_size,
in_channels,
height,
width)
kernel_shape = (out_channels,
in_channels,
kernel_h,
kernel_w)
#mistake of not padding input accordingly
fout_height = 14
fout_width = 14
output_shape = (batch_size ,
out_channels,
fout_height,
fout_width)
# Convolution reduction axes
dh = te.reduce_axis((0, kernel_h), name='dh')
dw = te.reduce_axis((0, kernel_w), name='dw')
ic = te.reduce_axis((0, in_channels), name='ic')
# Input placeholder tensors
data = te.placeholder(data_shape,
name="data",
dtype="int8")
kernel = te.placeholder(kernel_shape,
name="kernel",
dtype="int8")
res_conv = te.compute(
output_shape,
lambda n,c,h,w: te.sum(
data[n, ic, h*stride_h+dh, w*stride_w+dw] *
kernel[c, ic, dh, dw],
axis=[ic, dh, dw]),
name="res_conv")
s = te.create_schedule(res_conv.op)
print(tvm.lower(s,[data, kernel, res_conv],simple_mode=True))
Outputs
primfn(data_1: handle, kernel_1: handle, res_conv_1: handle) -> ()
attr = {"tir.noalias": True, "global_symbol": "main"}
buffers = {kernel: Buffer(kernel_2: handle, int8, [256, 256, 3, 3], []),
res_conv: Buffer(res_conv_2: handle, int8, [1, 256, 14, 14], []),
data: Buffer(data_2: handle, int8, [1, 256, 14, 14], [])}
buffer_map = {res_conv_1: res_conv, data_1: data, kernel_1: kernel} {
for (c: int32, 0, 256) {
for (h: int32, 0, 14) {
for (w: int32, 0, 14) {
res_conv_2[(((c*196) + (h*14)) + w)] = 0i8
for (ic: int32, 0, 256) {
for (dh: int32, 0, 3) {
for (dw: int32, 0, 3) {
res_conv_2[(((c*196) + (h*14)) + w)] = ((int8*)res_conv_2[(((c*196) + (h*14)) + w)]) + ((int8*)data_2[(((((ic*196) + (h*14)) + (dh*14)) + w) + dw)])*(int8*)kernel_2[((((c*2304) + (ic*9)) + (dh*3)) + dw)])))
}
}
}
}
}
}
}
Which for data_2[(((((ic*196) + (h*14)) + (dh*14)) + w) + dw)]
goes out of bound.
Sure the error is that the data
has not been accordingly padded in order for the tensor size computation to be correct.
Is there an optimization pass which is not standard which will deal with this or do we need to “burden” the programmer with deriving correct tensor sizes and in wors case go out-of-bound?