Not sure if it is the same issue. To do a quick check, could you add back the workaround in you local code of this file “topi/python/topi/cuda/conv2d_transpose_nchw.py”. To see if it is still needed in your case.
def schedule_conv2d_transpose_nchw_cuda(cfg, outs):
"""TOPI Schedule callback for conv2d transpose operator.
... ...
def _callback(op):
if op.tag == 'conv2d_transpose_nchw':
... ...
if cfg.is_fallback:
ko = int(kernel.shape[1])
kh = int(kernel.shape[2])
kw = int(kernel.shape[3])
stride_h, stride_w = cfg.stride
# Workaround to make CUDA compilation work. Issue #4470
# TODO make _fallback_schedule work for all kernel/strides combinations
# after issue #4470 is resolved
do_fallback = True
if ko == 1:
do_fallback = False
elif (kh, kw) == (1, 1):
do_fallback = True
elif (stride_h, stride_w) == (2, 2):
do_fallback = False
elif (kh, kw) == (stride_h, stride_w):
do_fallback = False
if do_fallback:
N, F, Y, X = get_const_tuple(conv.shape)
_fallback_schedule(N, F, Y, X)