Okay, my work around so far will produce the following IR. the annotation.quantize_info
contains ndom_scale
, nclip_min
& nclip_max
def @main(%X: Tensor[(1, 8), float32]) -> Tensor[(1, 8), float32] {
%0 = multiply(%X, 8429.23f /* ty=float32 */) /* ty=Tensor[(1, 8), float32] */;
%1 = round(%0) /* ty=Tensor[(1, 8), float32] */;
%2 = clip(%1, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 8), float32] */;
%3 = cast(%2, dtype="int8") /* ty=Tensor[(1, 8), int8] */;
%4 = nn.dense(%3, meta[relay.Constant][0] /* ty=Tensor[(8, 8), int8] */ /* ty=Tensor[(8, 8), int8] */, units=None, out_dtype="int8") /* ty=Tensor[(1, 8), int8] */;
%5 = on_device(%4, meta[relay.attrs.OnDeviceAttrs][0]) /* ty=Tensor[(1, 8), int8] */;
%6 = annotation.quantize_info(%5, meta[relay.attrs.QuantizeInfoAttrs][0]) /* ty=Tensor[(1, 8), int8] */;
%7 = on_device(%6, meta[relay.attrs.OnDeviceAttrs][1]) /* ty=Tensor[(1, 8), int8] */;
%8 = cast(%7, dtype="float32") /* ty=Tensor[(1, 8), float32] */;
multiply(%8, 4.63417e-07f /* ty=float32 */) /* ty=Tensor[(1, 8), float32] */
}
My op
// relay.annotation.quantize_info
TVM_REGISTER_NODE_TYPE(QuantizeInfoAttrs);
RELAY_REGISTER_OP("annotation.quantize_info")
.describe(R"code(Annotate an expression with it's quantization info)code" TVM_ADD_FILELINE)
.set_num_inputs(1)
.add_argument("data", "Tensor", "The input data.")
.add_type_rel("Identity", IdentityRel)
.set_support_level(10)
.set_attr<TOpPattern>("TOpPattern", kElemWise)
.set_attr<TOpIsStateful>("TOpIsStateful", false)
.set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
.set_attr<FTVMCompute>("FTVMCompute",
[](const Attrs& attrs, const Array<Tensor>& inputs,
const Type& out_dtype, const Target& target) -> Array<Tensor> {
return {topi::identity(inputs[0])};
});
Expr QuantizeInfo(Expr data, double ndom_scale, int nclip_min, int nclip_max) {
static const Op& op = Op::Get("annotation.quantize_info");
auto attrs = make_node<QuantizeInfoAttrs>();
attrs->ndom_scale = ndom_scale;
attrs->nclip_min = nclip_min;
attrs->nclip_max = nclip_max;
return CallNode::make(op, {data}, Attrs(attrs), {});
}
TVM_REGISTER_API("relay.op.annotation._make.quantize_info")
.set_body_typed<Expr(Expr, double, int, int)>([](Expr data, double ndom_scale, int nclip_min, int nclip_max) {
return QuantizeInfo(data, ndom_scale, nclip_min, nclip_max);
});
It will be fused with nn.dense
but the attributes appear empty.
Edit: Was able to resolve this by attaching attributes to the topi::identity call. I can access the values as expected for my backend now.