Hey, I am working on a simple box blur mainly to write an introductory blog post on TVM. It has four stages, padding, x-blur, y-blur, and a final cast back to uint8 datatype.
The y-blur accesses the x-blur result at 3 different y increments, so I would like to layout x_blur such that Y is the most rapidly changing dimension. I have been successful at this by changing that actual algorithm definition but I would like to do this during scheduling.
I am trying to use bind_buffer to control the stride of x_blur, but binding x_blur to any decl_buffer object causes TVM to think that I want to pass in x_blur to the function which is definitely not what I want:
chrisn@chrisn-lt-2:~/dev/tvm-clj/python/questions$ cat bind_buffer.py
import tvm
def print_schedule(sched, arglist):
print(tvm.lower(sched, arglist, simple_mode=True))
rows = tvm.var("rows")
cols = tvm.var("cols")
chans = tvm.var("chans")
input_vec = tvm.placeholder((rows,cols,chans), dtype="float32", name="input")
clamp = lambda v, v_min, v_max: tvm.max( tvm.min(v, v_max), v_min )
## clamp to edge padding
padded = tvm.compute((rows+2,cols+2,chans)
, lambda y, x, c: input_vec[clamp(y-1, 0, rows-1)
, clamp(x-1, 0, cols-1)
, c].astype("uint16")
, name="padded")
x_blur = tvm.compute((rows+2, cols, chans)
, lambda y, x, c: (padded[y,x,c] +
padded[y,x+1,c] +
padded[y,x+2,c]) / 3
, name="x_blur")
y_blur = tvm.compute((rows, cols, chans)
, lambda y, x, c: (x_blur[y,x,c] +
x_blur[y+1,x,c] +
x_blur[y+2,x,c]) / 3
, name="y_blur")
box_blur = tvm.compute((rows,cols,chans)
, lambda y, x, c: y_blur[y,x,c].astype("uint8")
, name="box_blur")
arglist = [input_vec, box_blur]
schedule = tvm.create_schedule(box_blur.op)
schedule[padded.op].compute_inline()
schedule[y_blur].compute_inline()
schedule[x_blur].compute_at(schedule[box_blur], box_blur.op.axis[1])
print_schedule(schedule, arglist)
x_blur_y_stride = 1
x_blur_c_stride = rows + 2
x_blur_x_stride = x_blur_c_stride * 3
fun = tvm.build(schedule, arglist, "llvm", name="box_blur"
, binds={x_blur: tvm.decl_buffer(x_blur.shape
, name="x_blur"
, scope="local"
, dtype=x_blur.dtype
, strides=[x_blur_y_stride,
x_blur_x_stride,
x_blur_c_stride])})
chrisn@chrisn-lt-2:~/dev/tvm-clj/python/questions$ python3 bind_buffer.py
[14:14:18] /home/chrisn/dev/tvm-clj/tvm/src/arithmetic/int_set.cc:514: cannot evaluate set type Cast
[14:14:18] /home/chrisn/dev/tvm-clj/tvm/src/arithmetic/int_set.cc:514: cannot evaluate set type Load
[14:14:18] /home/chrisn/dev/tvm-clj/tvm/src/arithmetic/int_set.cc:514: cannot evaluate set type Load
[14:14:18] /home/chrisn/dev/tvm-clj/tvm/src/arithmetic/int_set.cc:514: cannot evaluate set type Load
// attr [x_blur] storage_scope = "global"
allocate x_blur[int32 * 3 * 1 * chans]
produce box_blur {
for (y, 0, rows) {
for (x, 0, cols) {
produce x_blur {
for (y, 0, 3) {
for (c, 0, chans) {
x_blur[((y*chans) + c)] = (int32(((uint16(input[(((max((min(x, cols) + -1), 0) + (max((min((y + y), rows) + -1), 0)*cols))*chans) + c)]) + uint16(input[(((max(min(x, (cols + -1)), 0) + (max((min((y + y), rows) + -1), 0)*cols))*chans) + c)])) + uint16(input[(((max(min((x + 1), (cols + -1)), 0) + (max((min((y + y), rows) + -1), 0)*cols))*chans) + c)])))/3)
}
}
}
for (c, 0, chans) {
box_blur[((((y*cols) + x)*chans) + c)] = uint8((((x_blur[c] + x_blur[(chans + c)]) + x_blur[((chans*2) + c)])/3))
}
}
}
}
[14:14:18] /home/chrisn/dev/tvm-clj/tvm/src/arithmetic/int_set.cc:514: cannot evaluate set type Cast
[14:14:18] /home/chrisn/dev/tvm-clj/tvm/src/arithmetic/int_set.cc:514: cannot evaluate set type Load
[14:14:18] /home/chrisn/dev/tvm-clj/tvm/src/arithmetic/int_set.cc:514: cannot evaluate set type Load
[14:14:18] /home/chrisn/dev/tvm-clj/tvm/src/arithmetic/int_set.cc:514: cannot evaluate set type Load
Traceback (most recent call last):
File "bind_buffer.py", line 58, in <module>
x_blur_c_stride])})
File "/home/chrisn/.local/lib/python3.6/site-packages/tvm-0.5.dev0-py3.6-linux-x86_64.egg/tvm/build_module.py", line 445, in build
binds=binds)
File "/home/chrisn/.local/lib/python3.6/site-packages/tvm-0.5.dev0-py3.6-linux-x86_64.egg/tvm/build_module.py", line 380, in lower
return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func)
File "/home/chrisn/.local/lib/python3.6/site-packages/tvm-0.5.dev0-py3.6-linux-x86_64.egg/tvm/_ffi/_ctypes/function.py", line 185, in __call__
ctypes.byref(ret_val), ctypes.byref(ret_tcode)))
File "/home/chrisn/.local/lib/python3.6/site-packages/tvm-0.5.dev0-py3.6-linux-x86_64.egg/tvm/_ffi/base.py", line 66, in check_call
raise TVMError(py_str(_LIB.TVMGetLastError()))
tvm._ffi.base.TVMError: [14:14:18] /home/chrisn/dev/tvm-clj/tvm/src/pass/make_api.cc:169: Not all Vars are passed in api_args: 'x_blur' does not appeared in api_args
Is there a way, at scheduling time, to dictate the layout of intermediate buffers (ones that are allocated/deallocated by tvm during the course of its execution)?