Below is the test operator which will reproduce the issue:
inline tvm::Tensor test_operator(const tvm::Tensor& input,
const tvm::Tensor& input1,
const tvm::Tensor& input2,
std::string name = "test_operator",
std::string tag = kInjective) {
int h = topi::detail::GetConstInt(input2->shape[1]);
int w = topi::detail::GetConstInt(input2->shape[2]);
int bs = topi::detail::GetConstInt(input1->shape[0]);
int gd = topi::detail::GetConstInt(input1->shape[3]);
int coeffs_chans = topi::detail::GetConstInt(input1->shape[4]);
int input_chans = topi::detail::GetConstInt(input->shape[3]);
int output_chans = coeffs_chans / (input_chans+1);
tvm::Array<tvm::Expr> axes;
axes.push_back(tvm::make_const(tvm::Int(32), 0));
Tensor input_ft = cast(squeeze(topi::nn::flatten(input), axes), input->dtype);
Tensor input1_ft = cast(squeeze(topi::nn::flatten(input1), axes), input->dtype);
Tensor input2_ft = cast(squeeze(topi::nn::flatten(input2), axes), input->dtype);
int total_count = bs*h*w*output_chans;
auto test_operator_kernel = [&](const Array<Var>& indices) {
auto idx = indices[0];
auto x = tvm::cast(Int(32), (idx / output_chans)) % w;
auto gz = input2_ft[x]*gd;
auto coeff_sample = Expr(0.0f);
coeff_sample += input1_ft[tvm::cast(Int(32), gz)];
return coeff_sample*input_ft[input_chans];
};
Array<Expr> out_shape;
out_shape.push_back(total_count);
Tensor output = compute(out_shape, test_operator_kernel, name, tag);
Array <Expr> intermediate_shape = {bs, h, w, output_chans};
return reshape(output, intermediate_shape);
}
Opencl output:
------opencl code------
__kernel void myadd_kernel0(__global float* restrict tensor, __global float* restrict S, __global float* restrict G, __global float* restrict A) {
for (int ax1 = 0; ax1 < 1200; ++ax1) {
for (int ax2 = 0; ax2 < 1600; ++ax2) {
for (int ax3 = 0; ax3 < 3; ++ax3) {
float _1 = G[ax2];
float _2 = _1 * 8.000000e+00f;
int _3 = (int)_2;
float _4 = ; –> Here the line with empty RHS generated
float _5 = A[3];
float _6 = _4 * _5;
int _7 = ax1 * 1600;
int _8 = _7 + ax2;
int _9 = _8 * 3;
int _10 = _9 + ax3;
tensor[_10] = _6;
}
}
}
}