Issue in depthwise convolution

NNVM fails to convert nn.separable_conv2d operator with error “input channels must divide group size”. Error is generated by Conv2DInferShape for following inputs:

Input [1, 32, 32, 3]
Depthwise weights [7, 7, 3, 8]
Pointwise weights [1, 1, 24, 64]
data_format ‘NHWC’

There are following issues:

  1. The NNVM TensorFlow frontend incorrectly sets the kernel_layout to ‘HWOI’. The depth multiplier (depth_mult) is taken from weights_shape[3] thus the kernel_layout should be ‘HWIO’. [Function _conv() of tensorflow.py]
  2. In case of depthwise convolution, ‘groups’ are set to ‘channels’ (in_channels * depth_mult). Input channels are 3 and groups are 24. Thus the (dshape[1] % param.groups) check fails.
  3. num_filter or channel_multiplier of weight shape is calculated as ((param.channels / param.groups) * param.groups) which is incorrectly set to number of output channels. This should have been set to (param.channels / dshape[1]). Note that data_layout has been changed to NCHW from original NHWC. [Function Conv2DInferShape of convolution.cc]

Test case:

#######################################################################

Separable Convolution

---------------------

def _test_sep_convolution(tensor_in_sizes, dwfilter_size, pwfilter_size,
dilations, strides, padding, data_format):
“”" One iteration of separable convolution with given shapes and attributes “”"

total_size_1 = 1
total_size_2 = 1
total_size_3 = 1
for s in tensor_in_sizes:
    total_size_1 *= s
for s in dwfilter_size:
    total_size_2 *= s
for s in pwfilter_size:
    total_size_3 *= s
# Initializes the input tensor with array containing incrementing
# numbers from 1.
data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
dwfilter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
pwfilter_array = [f * 1.0 for f in range(1, total_size_3 + 1)]

with tf.Graph().as_default():
    in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype='float32')
    dw_filter = constant_op.constant(dwfilter_array, shape=dwfilter_size, dtype='float32')
    pw_filter = constant_op.constant(pwfilter_array, shape=pwfilter_size, dtype='float32')
    strides = [1] + strides + [1]
    dilations = [1] + dilations + [1]

    nn.separable_conv2d(in_data,
                  dw_filter,
                  pw_filter,
                  strides=strides,
                  padding=padding,
                  data_format=data_format)

    compare_tf_with_tvm(np.reshape(data_array, tensor_in_sizes).astype('float32'),
                        'Placeholder:0', 'separable_conv2d:0')

def test_forward_sep_convolution():
_test_sep_convolution([1, 32, 32, 3], [7, 7, 3, 8], [1, 1, 24, 64], [1, 1], [1, 1], ‘SAME’, ‘NHWC’)

Possible solution:

diff --git a/nnvm/python/nnvm/frontend/tensorflow.py b/nnvm/python/nnvm/frontend/tensorflow.py
index ad7c4fc…b642d45 100644
— a/nnvm/python/nnvm/frontend/tensorflow.py
+++ b/nnvm/python/nnvm/frontend/tensorflow.py
@@ -208,6 +208,7 @@ def _conv(opname):

     if attr['data_format'] == 'NHWC':
         kernel_h, kernel_w, _, depth_mult = weights_shape
  •        attr['kernel_layout'] = 'HWIO'
           attr['kernel_shape'] = (weights_shape[0], weights_shape[1])
           if opname == 'conv':
               attr['channels'] = weights_shape[3]
    

diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py
index 49192ca…4fff430 100644
— a/nnvm/python/nnvm/top/nn.py
+++ b/nnvm/python/nnvm/top/nn.py
@@ -122,6 +122,12 @@ def compute_conv2d(attrs, inputs, _):
groups == channels:
out = topi.nn.depthwise_conv2d_nhwc(
inputs[0], kernel, strides, padding, out_dtype=out_dtype)

  • elif layout == “NHWC” and \
  •     kernel_layout == "HWIO" and \
    
  •     groups == get_const_int(inputs[0].shape[3]) * get_const_int(inputs[1].shape[3]) and \
    
  •     groups == channels:
    
  •     out = topi.nn.depthwise_conv2d_nhwc(
    
  •        inputs[0], kernel, strides, padding, out_dtype=out_dtype)
    
    else:
    raise ValueError(“not support arbitrary group number for now”)

@@ -149,7 +155,7 @@ def schedule_conv2d(attrs, outs, target):
return topi.generic.schedule_conv2d_nhwc(outs)
elif groups == channels and layout == “NCHW”:
return topi.generic.schedule_depthwise_conv2d_nchw(outs)

  •    elif groups == channels and layout == "NHWC" and kernel_layout == "HWOI":
    
  •    elif groups == channels and layout == "NHWC":
           return topi.generic.schedule_depthwise_conv2d_nhwc(outs)
       else:
           raise ValueError("No compatible schedule")
    

diff --git a/nnvm/src/top/nn/convolution.cc b/nnvm/src/top/nn/convolution.cc
index 22bda04…852dcf2 100644
— a/nnvm/src/top/nn/convolution.cc
+++ b/nnvm/src/top/nn/convolution.cc
@@ -33,6 +33,7 @@ inline bool Conv2DInferShape(const nnvm::NodeAttrs& attrs,
std::vector* out_shape) {
static const Layout kNCHW(“NCHW”);
static const Layout kOIHW(“OIHW”);

  • bool depthwise = false;

    const Conv2DParam& param = nnvm::get(attrs.parsed);

@@ -51,6 +52,9 @@ inline bool Conv2DInferShape(const nnvm::NodeAttrs& attrs,
<< “Conv only support output layouts that are convertible from NCHW.”
<< " But got " << out_layout;

  • if (param.groups != 1 && param.groups == param.channels)
  • depthwise = true;
  • if (param.use_bias) {
    CHECK_EQ(in_shape->size(), 3U) << “Input:[data, weight, bias]”;
    } else {
    @@ -68,8 +72,11 @@ inline bool Conv2DInferShape(const nnvm::NodeAttrs& attrs,
    << "incorrect stride size: " << param.strides;
    CHECK_EQ(param.dilation.ndim(), 2U)
    << "incorrect dilate size: " << param.dilation;
  • CHECK_EQ(dshape[1] % param.groups, 0U)
  •  << "input channels must divide group size";
    
  • if (depthwise == false)
  • CHECK_EQ(dshape[1] % param.groups, 0U)
  •    << "input channels must divide group size";
    
  • CHECK_EQ(param.channels % param.groups, 0U)
    << “output channels must divide group size”;

@@ -78,9 +85,18 @@ inline bool Conv2DInferShape(const nnvm::NodeAttrs& attrs,
param.kernel_size[0],
param.kernel_size[1]});

  • if (depthwise == true) {
  • TShape dw_shape({param.channels / dshape[1],
  •                 dshape[1],
    
  •                 param.kernel_size[0],
    
  •                 param.kernel_size[1]});
    
  • wshape = dw_shape;
  • }
  • wshape = ConvertLayout(wshape, kOIHW, kernel_layout);
  • wshape[kernel_layout.indexof(‘O’)] *= param.groups;
  • if (depthwise == false)
  • wshape[kernel_layout.indexof(‘O’)] *= param.groups;

NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kWeight, wshape);
if (param.use_bias) {