About run time on rasp Pi 3B+, NOW is result accuracy

when i run mobilenetv1.0 on rasp Pi 3B+ by rpc, inference time is 120 ms. However, i use cross compilers to make .so running on rasp , the inference time is 280ms, so what happend?

it is my mistake , it is same between rpc and loacl

Ok, now i find unexpected result about the difference rusult between pi run locally and RPC,

Here is the code,

import os
import zipfile
import tvm
import mxnet as mx
import cv2, time
import numpy as np
import argparse
from nnvm import compiler
from nnvm.frontend import from_mxnet
from tvm import relay
from tvm.contrib.download import download
from tvm.contrib import graph_runtime, util
from mxnet.model import load_checkpoint
from tvm import rpc


test_image = "1.jpg"
dshape = (1, 3, 240, 320)
dtype = "float32"

local_demo = False
if local_demo:
    target = "llvm"
else:
    target = tvm.target.arm_cpu('rasp3b')

sym, arg_params, aux_params = load_checkpoint('facedetect', 0)

parser = argparse.ArgumentParser()

parser.add_argument("-f", "--frontend", help="Frontend for compilation, nnvm or relay",
                    type=str, default="relay")

args = parser.parse_args()

if args.frontend == "relay":
    net, params = relay.frontend.from_mxnet(sym, {"data": dshape}, arg_params=arg_params, aux_params=aux_params)
    with relay.build_config(opt_level=2):
        graph, lib, params = relay.build(net, target, params=params)
elif args.frontend == "nnvm":
    net, params = from_mxnet(sym, arg_params, aux_params)
    with compiler.build_config(opt_level=3):
        graph, lib, params = compiler.build(net, target, {"data": dshape}, params=params)
else:
    parser.print_help()
    parser.exit()


lib.export_library('model/facedetect.so', tvm.contrib.cc.create_shared, cc='/home/wanghao/program/arm_new/bin/arm-linux-gnueabihf-g++')

with open('model/facedetect.json', "w") as fo:
    fo.write(graph)
with open('model/facedetect.params', "wb") as fo:
    fo.write(relay.save_param_dict(params))

def process_image(image):
    rect = cv2.resize(image, (dshape[3], dshape[2]))
    rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB).astype(np.float32)
    rect -= np.array([104, 123, 117])
    rect = rect.transpose((2, 0, 1))
    img_data = np.expand_dims(rect, axis=0)
    return img_data

def draw_box(img, out, thresh=0.5):

    for det in out:
        cid = int(det[0])
        if cid >= 0:
            score = det[1]
            print(score)
            if score > thresh:
                scales = [img.shape[1], img.shape[0]] * 2
                x1, y1, x2, y2 = (np.array(det[2:6])*scales).astype(np.int)
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255))
                text = class_names[cid]
                cv2.putText(img, '{:s} {:.3f}'.format(text, score), (x1, y1 - 2), cv2.FONT_ITALIC, 1, (0, 0, 255))

if local_demo:
    ctx = tvm.cpu()
    m = graph_runtime.create(graph, lib, ctx)
else:
    host = '192.168.1.114'
    port = 9090
    remote = rpc.connect(host, port)
    tmp = util.tempdir()
    lib_fname = tmp.relpath('net.tar')
    lib.export_library(lib_fname)
    remote.upload(lib_fname)
    rlib = remote.load_module('net.tar')

    ctx = remote.cpu()
    m = graph_runtime.create(graph, rlib, ctx)

m.set_input(**params)

class_names = ["face",]
cap = cv2.VideoCapture(0)
while True:

    ret, frame = cap.read()
    img_data = process_image(frame)
    m.set_input('data', tvm.nd.array(img_data.astype(dtype)))

    t1 = time.time()
    m.run()
    print('fps:  ', 1 / (time.time() - t1))
    tvm_output = m.get_output(0).asnumpy()[0]

    draw_box(frame, tvm_output, 0.8)
    cv2.imshow('hello', frame)
    if cv2.waitKey(1) == 27:
        break

the result running on pi locally,

and the result on RPC,

yes, on pi local detect result face probability is about 0.2, but you can see on RPC the face probability is 0.9, maybe problem is tvm.contrib.cc.create_shared, i dont konw

Did you use the docker to do it?