Dear all, I’m a bit confused on how TVM fold operations. Hope anyone can help me. My question is that is there any way in TVM to fold the 2 add operations into 1 add since they both have 1 const operand? Thanks a lot!
%0 = nn.conv2d(%data, %conv1_1_weight, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 224, 224), float32] */;
%1 = nn.bias_add(%0, %conv1_1_bias) /* ty=Tensor[(1, 64, 224, 224), float32] */;
%2 = nn.batch_norm(%1, %bn1_1_gamma, %bn1_1_beta, %bn1_1_moving_mean, %bn1_1_moving_var) /* ty=(Tensor[(1, 64, 224, 224), float32], Tensor[(64), float32], Tensor[(64), float32]) */;
%3 = %2.0;
%4 = nn.relu(%3) /* ty=Tensor[(1, 64, 224, 224), float32] */;
After performing FoldScaleAxis(), it looks like:
%0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(64, 3, 3, 3), float32] */ /* ty=Tensor[(64, 3, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 224, 224), float32] */;
%1 = add(%0, meta[relay.Constant][1] /* ty=Tensor[(64, 1, 1), float32] */ /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 224, 224), float32] */;
%2 = add(%1, meta[relay.Constant][2] /* ty=Tensor[(64, 1, 1), float32] */ /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 224, 224), float32] */;
%3 = nn.relu(%2) /* ty=Tensor[(1, 64, 224, 224), float32] */;
Code:
import tvm
from tvm import relay
from tvm.relay import transform
import tvm.relay.testing
from tvm.relay.build_module import bind_params_by_name
def fold_optimize(mod, params=None):
optimize = tvm.transform.Sequential( [
relay.transform.CanonicalizeOps(),
relay.transform.SimplifyInference(),
relay.transform.FoldScaleAxis(),
])
if params:
mod["main"] = bind_params_by_name(mod["main"], params)
mod = optimize(mod)
return mod
if __name__ == '__main__':
mod, params = relay.testing.vgg.get_workload(1, batch_norm=True)
print(mod.astext(show_meta_data=False))
with tvm.transform.PassContext(opt_level=3):
mod = fold_optimize(mod, params=params)
print(mod.astext(show_meta_data=False))