Hi,
I have been following the howto_deploy
example to run inference with c++.
My model is an onnx model, and I have built it to so
file:
def compile_model():
model = onnx.load(mdpth)
mod, params = relay.frontend.from_onnx(
model, shape_dict, dtype='float32', opset=11)
func = mod['main']
with autotvm.apply_history_best(tune_log_file), relay.build_config(opt_level=opt_level):
# with relay.build_config(opt_level=opt_level):
graph, lib, params = relay.build(func, target, params=params)
with open('tmp/{}_graph.json'.format(out_name), 'w') as fw:
fw.write(graph)
with open('tmp/{}_param.params'.format(out_name), 'wb') as fw:
fw.write(relay.save_param_dict(params))
lib.export_library('tmp/lib{}.tar'.format(out_name))
The problem is that my model has only on input tensor (an image) but 14 output tensors.
I imitate the example c++ code to run inference:
int main() {
tvm::runtime::Module mod_dylib =
tvm::runtime::Module::LoadFromFile("../lib/libssd512.tar.so");
tvm::runtime::PackedFunc f = mod_dylib.GetFunction("main");
if (f == nullptr) cout << "load func from so file error\n";
int dtype_code = kDLFloat;
int dtype_bits = 32;
int dtype_lanes = 1;
int device_type = kDLGPU;
int device_id = 4;
int64_t in_shape[4] = {1, 3, 512, 512};
int in_ndim = 4;
DLTensor *inten;
TVMArrayAlloc(in_shape, in_ndim, dtype_code, dtype_bits, dtype_lanes,
device_type, device_id, &inten);
int64_t out_shapes[14][4] = {
{1, 324, 64, 64}, {1, 486, 32, 32}, {1, 486, 16, 16}, {1, 486, 8, 8},
{1, 486, 4, 4}, {1, 324, 2, 2}, {1, 324, 1, 1}, {1, 16, 64, 64},
{1, 24, 32, 32}, {1, 24, 16, 16}, {1, 24, 8, 8}, {1, 24, 4, 4},
{1, 16, 2, 2}, {1, 16, 1, 1}};
int out_ndim = 4;
vector<DLTensor*> outs(14);
for (int i{0}; i < 14; ++i) {
TVMArrayAlloc(out_shapes[i], out_ndim, dtype_code, dtype_bits,
dtype_lanes, device_type, device_id, &outs[i]);
}
f(inten, outs[0], outs[1], outs[2], outs[3], outs[4], outs[5], outs[6], outs[7], outs[8], outs[9], outs[10], outs[11], outs[12], outs[13]);
TVMArrayFree(inten);
for (int i{0}; i < 14; ++i) {
TVMArrayFree(outs[i]);
}
while(true) {}
return 0;
}
Then I got the error of:
load func from so file error terminate called after throwing an instance of ‘std::bad_function_call’ what(): bad_function_call Aborted (core dumped)
How could I infer with c++ on serialized onnx model please ?