Hi , @kevinthesun
everything goes ok, exception the output is wrong.
cv::Mat tensor = cv::dnn::blobFromImage(inputImageAligned,1.0,cv::Size(256,256),cv::Scalar(0,0,0),true);
constexpr int device_type = kDLCPU;
constexpr int device_id = 0;
constexpr int in_ndim = 4;
//const int64_t in_shape[in_ndim] = {1, 3, 256, 256};
const int64_t in_shape[in_ndim] = {1, 256, 256, 3}; (did the autotune using layout of NWHC, input_shape = (1, 256, 256,3)) , or in_shape was not set in the right way for deploy?
TVMArrayAlloc(in_shape, in_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &input);
TVMArrayCopyFromBytes(input,tensor.data,2563256*4);
when doing the autotune, I got the right output result as follows:
tvm_output = module.get_output(0, tvm.nd.empty(((65536,2)), ‘float32’))
tvm_output_to_numpy = tvm_output.asnumpy()
mask_1 = tvm_output_to_numpy[:,1].reshape(256,256)
mask_2 = tvm_output_to_numpy[:,0].reshape(256,256)
could I get the right output format in the following way for deployment in C++?
tvm::runtime::PackedFunc get_output = mod->GetFunction(“get_output”);
tvm::runtime::NDArray res = get_output(0);
cv::Mat vector(65536,2,CV_32F);
memcpy(vector.data,res->data,6553642);
cv::Mat mask = vector.reshape(2, 256).clone();
thanks a lot!