@masahi thanks. I can move further, but stuck in one Error. I feel it is related to unsuccessful tensorize but have no clue.
> def conv_intrin (args):
> batch = args[0]
> ic = args[1]
> ih = args[2]
> iw = args[3]
> oc = args[4]
> kh = args[5]
> kw = args[6]
> pad = args[7]
> stride = args[8]
>
> d = tvm.placeholder((ic, ih, iw), name='data')
> w = tvm.placeholder((ic, kh, kw), name='weight')
> oh = (ih - kh + 2*pad) // stride + 1
> ow = (iw - kw + 2*pad) // stride + 1
>
> # Create reduction variables
> rc = tvm.reduce_axis((0, ic), name='rc')
> ry = tvm.reduce_axis((0, kh), name='ry')
> rx = tvm.reduce_axis((0, kw), name='rx')
>
> # Compute the convolution
> c = tvm.compute((oh, ow), lambda yy, xx: tvm.sum(
> d[rc, yy * stride + ry, xx * stride + rx] * w[rc, ry, rx],
> axis=[rc, ry, rx]), name='c')
>
> def conv_intrin_lower (ins, outs):
> ib = tvm.ir_builder.create()
> dbuf, wbuf = ins
> cbuf = outs[0]
> ib.emit(tvm.call_extern('float32', 'conv_bulldog',
> cbuf.access_ptr('w'),
> dbuf.access_ptr('r'),
> wbuf.access_ptr('r'),
> args))
> return ib.get()
>
> return tvm.decl_tensor_intrin(c.op, conv_intrin_lower)
>
> def conv_intrin_schedule (attrs, outs, target):
> with tvm.target.create(target):
> s = tvm.create_schedule([x.op for x in outs])
> scheduled_ops = []
>
> def traverse(op):
> print("OP tag: ", op.tag)
> if 'broadcast' in op.tag:
> if op not in s.outputs:
> s[op].compute_inline()
> for tensor in op.input_tensors:
> if tensor.op.input_tensors and tensor.op not in scheduled_ops:
> traverse(tensor.op)
>
> if 'conv2d_nchw' in op.tag:
> output = op.output(0)
> conv_out = op.input_tensors[0]
> data_vec = conv_out.op.input_tensors[0]
> data = data_vec.op.input_tensors[0]
> data_pad = None
> if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
> data_pad = data
> data = data_pad.op.input_tensors[0]
>
> batch, ic, ih, iw = data.shape
> _, oc, _, _ = output.shape
> kh, kw = attrs.get_int_tuple("kernel_size")
> stride, _ = attrs.get_int_tuple("strides")
> pad, _ = attrs.get_int_tuple("padding")
> args = (batch, ic, ih, iw, oc, kh, kw, pad, stride)
> print("args: ", args)
> ti = conv_intrin(args)
> axis = conv_out.op.axis[0]
> s[x].tensorize(axis, ti)
> scheduled_ops.append(op)
>
> traverse(outs[0].op)
> print("scheduled ops: ", scheduled_ops)
> return s
>
> data = nnvm.symbol.Variable(name='data')
> kernel = nnvm.symbol.Variable(name='kernel')
> conv = nnvm.symbol.conv2d(data=data, weight=kernel, kernel_size=(3,3), channels=1, strides=[1,1])
> graph = nnvm.graph.create(conv)
> shape = {'data': (1, 3, 12, 12), 'kernel': (1, 3, 3, 3)}
>
> nnvm.top.registry.register_schedule('conv2d', conv_intrin_schedule, level=11)
> nnvm.top.registry.register_pattern('conv2d', nnvm.top.registry.OpPattern.OPAQUE, level=11)
>
> dg, mhost2, params = nnvm.compiler.build(graph, target='llvm', shape=shape, dtype='float32')
> print(dg.ir())
> print(mhost2.get_source())
> mhost2.export_library('./conv2_graph.so')
>
> DEBUG:autotvm:Finish loading 35 records
> WARNING:autotvm:Cannot find config for target=llvm, workload=('conv2d', (1, 3, 12, 12, 'float32'), (1, 3, 3, 3, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
>
> ('OP tag: ', 'broadcast')
> ('OP tag: ', 'conv2d_nchw')
> ('args: ', (1, 3, 12, 12, 1, 3, 3, 0, 1))
>
> ---------------------------------------------------------------------------
> NNVMError Traceback (most recent call last)
> <ipython-input-5-1df80d347f1e> in <module>()
> 2 nnvm.top.registry.register_pattern('conv2d', nnvm.top.registry.OpPattern.OPAQUE, level=11)
> 3
> ----> 4 dg, mhost2, params = nnvm.compiler.build(graph, target='llvm', shape=shape, dtype='float32')
> 5 print(dg.ir())
> 6 print(mhost2.get_source())
>
> /home/sxie/projects/tvm/nnvm/python/nnvm/compiler/build_module.pyc in build(graph, target, shape, dtype, params, target_host, layout)
> 303 graph = graph.apply("GraphFuse")
> 304 with target:
> --> 305 graph = graph.apply("GraphCompile")
> 306 libmod = graph_attr._move_out_module(graph, "module")
> 307 # Write variable initial values into params
>
> /home/sxie/projects/tvm/nnvm/python/nnvm/graph.pyc in apply(self, passes)
> 232 ghandle = GraphHandle()
> 233 npass = nn_uint(len(passes))
> --> 234 check_call(_LIB.NNGraphApplyPasses(self.handle, npass, cpass, ctypes.byref(ghandle)))
> 235 return Graph(ghandle)
> 236
>
> /home/sxie/projects/tvm/nnvm/python/nnvm/_base.pyc in check_call(ret)
> 73 """
> 74 if ret != 0:
> ---> 75 raise NNVMError(py_str(_LIB.NNGetLastError()))
> 76
> 77 def c_str(string):
>
> NNVMError: TVMCall CFunc Error:
> Traceback (most recent call last):
> File "/home/sxie/projects/tvm/python/tvm/_ffi/_ctypes/function.py", line 55, in cfun
> rv = local_pyfunc(*pyargs)
> File "<ipython-input-3-20d6fcc639c9>", line 37, in conv_intrin_schedule
> traverse(outs[0].op)
> File "<ipython-input-3-20d6fcc639c9>", line 13, in traverse
> traverse(tensor.op)
> File "<ipython-input-3-20d6fcc639c9>", line 32, in traverse
> ti = conv_intrin(args)
> File "<ipython-input-2-2530259ff3f5>", line 39, in conv_intrin
> return tvm.decl_tensor_intrin(c.op, conv_intrin_lower)
> File "/home/sxie/projects/tvm/python/tvm/tensor_intrin.py", line 109, in decl_tensor_intrin
> body = fcompute(binds_list[:len(inputs)], binds_list[len(inputs):])
> File "<ipython-input-2-2530259ff3f5>", line 36, in conv_intrin_lower
> args))
> File "/home/sxie/projects/tvm/python/tvm/intrin.py", line 154, in call_extern
> dtype, func_name, convert(args), _Call.Extern, None, 0)
> File "/home/sxie/projects/tvm/python/tvm/_ffi/_ctypes/function.py", line 185, in __call__
> ctypes.byref(ret_val), ctypes.byref(ret_tcode)))
> File "/home/sxie/projects/tvm/python/tvm/_ffi/base.py", line 72, in check_call
> raise TVMError(py_str(_LIB.TVMGetLastError()))
> TVMError: [12:29:51] /home/sxie/projects/tvm/include/tvm/packed_func_ext.h:123: Check failed: NodeTypeChecker<TNodeRef>::Check(sptr.get()) Expected type array<Expr> but get Array
>
> Stack trace returned 10 entries:
> [bt] (0) /home/sxie/projects/tvm/build/libtvm.so(dmlc::StackTrace[abi:cxx11](unsigned long)+0x1f5) [0x7fad54668465]
> [bt] (1) /home/sxie/projects/tvm/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x3e) [0x7fad546690ee]
> [bt] (2) /home/sxie/projects/tvm/build/libtvm.so(tvm::Array<HalideIR::Expr, void> tvm::runtime::TVMArgValue::AsNodeRef<tvm::Array<HalideIR::Expr, void> >() const+0x33c) [0x7fad5469084c]
> [bt] (3) /home/sxie/projects/tvm/build/libtvm.so(+0x2cefc7) [0x7fad54687fc7]
> [bt] (4) /home/sxie/projects/tvm/build/libtvm.so(TVMFuncCall+0x5e) [0x7fad54cd707e]
> [bt] (5) /home/sxie/projects/python2/anaconda2/envs/ml-suite/lib/python2.7/lib-dynload/../../libffi.so.6(ffi_call_unix64+0x4c) [0x7fad744bcec0]
> [bt] (6) /home/sxie/projects/python2/anaconda2/envs/ml-suite/lib/python2.7/lib-dynload/../../libffi.so.6(ffi_call+0x22d) [0x7fad744bc87d]
> [bt] (7) /home/sxie/projects/python2/anaconda2/envs/ml-suite/lib/python2.7/lib-dynload/_ctypes.so(_ctypes_callproc+0x4de) [0x7fad746d38de]
> [bt] (8) /home/sxie/projects/python2/anaconda2/envs/ml-suite/lib/python2.7/lib-dynload/_ctypes.so(+0x9b31) [0x7fad746c9b31]
> [bt] (9) /home/sxie/projects/python2/anaconda2/envs/ml-suite/bin/../lib/libpython2.7.so.1.0(PyObject_Call+0x43) [0x7fad76816973]