I didn't get the performence difference under 'cpu' or 'vulkan' or 'opencl' mode, i didin't know what's wrong, any helps, thanks


#1

i follw the to tutorials from https://docs.tvm.ai/tutorials/frontend/deploy_model_on_android.html?highlight=android;
all is OK, but when tested the code in tutorials, i get the results as follow,
with config.mk:

APP_ABI = arm64-v8a

APP_PLATFORM = android-24

# whether enable OpenCL during compile
USE_OPENCL = 1

# whether to enable Vulkan during compile
USE_VULKAN = 1

ifeq ($(USE_VULKAN), 1)
  # Statically linking vulkan requires API Level 24 or higher
  APP_PLATFORM = android-24
endif

# the additional include headers you want to add, e.g., SDK_PATH/adrenosdk/Development/Inc
ADD_C_INCLUDES += /opt/adrenosdk-linux-5_0/Development/Inc
# download from https://github.com/KhronosGroup/OpenCL-Headers
ADD_C_INCLUDES += /workspace/3rdparty/OpenCL-Headers/

# the additional link libs you want to add, e.g., ANDROID_LIB_PATH/libOpenCL.so
ADD_LDLIBS =libOpenCL.so

i download the adrenosdk-linux-5_0 and OpenCL-Headers, and copy libOpenCL.so
cpu: 10.03(0.24)
opencl: 10.15(0.24)
vulkan: 10.2(0.2)

test code as follow(all copied from tutorials):

import os
import numpy as np
from PIL import Image
import keras
from keras.applications.mobilenet_v2 import MobileNetV2    
import tvm
import tvm.relay as relay
from tvm import rpc
from tvm.contrib import util, ndk, graph_runtime as runtime
from tvm.contrib.download import download_testdata

keras.backend.clear_session()  # Destroys the current TF graph and creates a new one.    
weights_url = ''.join(['https://github.com/JonathanCMitchell/',
                   'mobilenet_v2_keras/releases/download/v1.1/',
                   'mobilenet_v2_weights_tf_dim_ordering_tf_kernels_0.5_224.h5'])    
weights_file = 'mobilenet_v2_weights.h5'
weights_path = download_testdata(weights_url, weights_file, module='keras')
keras_mobilenet_v2 = MobileNetV2(alpha=0.5, include_top=True, weights=None,
                            input_shape=(224, 224, 3), classes=1000)
keras_mobilenet_v2.load_weights(weights_path)

img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
img_name = 'cat.png'
img_path = download_testdata(img_url, img_name, module='data')
image = Image.open(img_path).resize((224, 224))
dtype = 'float32'

def transform_image(image):
    image = np.array(image) - np.array([123., 117., 104.])
    image /= np.array([58.395, 57.12, 57.375])
    image = image.transpose((2, 0, 1))
    image = image[np.newaxis, :]
    return image

x = transform_image(image)


synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
                  '4d0b62f3d01426887599d4f7ede23ee5/raw/',
                  '596b27d23537e5a1b5751d2b0481ef172f58b539/',
                  'imagenet1000_clsid_to_human.txt'])
synset_name = 'imagenet1000_clsid_to_human.txt'
synset_path = download_testdata(synset_url, synset_name, module='data')
with open(synset_path) as f:
    synset = eval(f.read())

local_demo = True

# by default on CPU target will execute.
# select 'cpu', 'opencl' and 'vulkan'
test_target = 'cpu'#'vulkan'#'opencl'#

# Change target configuration.
# Run `adb shell cat /proc/cpuinfo` to find the arch.
arch = 'arm64'
target = 'llvm -target=%s-linux-android' % arch
target_host = None

if local_demo:
    target_host = None
    target = 'llvm'
elif test_target == 'opencl':
    target_host = target
    target = 'opencl'
elif test_target == 'vulkan':
    target_host = target
    target = 'vulkan'

input_name = 'input_1'
shape_dict = {input_name: x.shape}
func, params = relay.frontend.from_keras(keras_mobilenet_v2, shape_dict)

with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(func, target=target,
                                 target_host=target_host, params=params)

# After `relay.build`, you will get three return values: graph,
# library and the new parameter, since we do some optimization that will
# change the parameters but keep the result of model as the same.

# Save the library at local temporary directory.
tmp = util.tempdir()
lib_fname = tmp.relpath('net.so')
fcompile = ndk.create_shared if not local_demo else None
lib.export_library(lib_fname, fcompile)



tracker_host = os.environ.get('TVM_TRACKER_HOST', '0.0.0.0')
tracker_port = int(os.environ.get('TVM_TRACKER_PORT', 9190))
key = 'android'

if local_demo:
    remote = rpc.LocalSession()
else:
    tracker = rpc.connect_tracker(tracker_host, tracker_port)
    # When running a heavy model, we should increase the `session_timeout`
    remote = tracker.request(key, priority=0,
                         session_timeout=60)

if local_demo:
    ctx = remote.cpu(0)
elif test_target == 'opencl':
    ctx = remote.cl(0)
elif test_target == 'vulkan':
    ctx = remote.vulkan(0)
else:
    ctx = remote.cpu(0)

# upload the library to remote device and load it
remote.upload(lib_fname)
rlib = remote.load_module('net.so')

# create the remote runtime module    
module = runtime.create(graph, rlib, ctx)


# set parameter (upload params to the remote device. This may take a while)
module.set_input(**params)
# set input data
module.set_input(input_name, tvm.nd.array(x.astype(dtype)))
# run
module.run()
# get output
out = module.get_output(0)

# get top1 result
top1 = np.argmax(out.asnumpy())
print('TVM prediction top-1: {}'.format(synset[top1]))

print('Evaluate inference time cost...')
ftimer = module.module.time_evaluator('run', ctx, number=1, repeat=10)
prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
print('Mean inference time (std dev): %.2f ms (%.2f ms)' % (np.mean(prof_res),
                                                        np.std(prof_res)))

and i also try default config.mk

APP_ABI = all

APP_PLATFORM = android-24

# whether enable OpenCL during compile
USE_OPENCL = 0

# whether to enable Vulkan during compile
USE_VULKAN = 0

ifeq ($(USE_VULKAN), 1)
  # Statically linking vulkan requires API Level 24 or higher
  APP_PLATFORM = android-24
endif

# the additional include headers you want to add, e.g., SDK_PATH/adrenosdk/Development/Inc        
ADD_C_INCLUDES =

#     the additional link libs you want to add, e.g., ANDROID_LIB_PATH/libOpenCL.so    
ADD_LDLIBS =

the results were similar

and when tested code, i always got the warning like:
StreamExecutor device (0): 《undefined>, 《undefined>

but i didin’t know what’s meaning;
and my phone is 坚果Pro2特别版(smartisan OS, android 7.1.1,Qualcomm Tech,Inc SDM636);
any helps will be great appreciated


#2

I meet the similar problem, but I don’t know how to solve it.


#3

hey, guys, did you solve your problems?


#4

need set : local_demo = False,
i got cpu :176ms
opencl: not working
vulkan working with lots of error: 990.92 ms (4.18 ms)

in fact:i didn’t know what’s difference of their three。


#5

need set : local_demo = False,
i got cpu :176ms
opencl: not working
vulkan working with lots of error: 990.92 ms (4.18 ms)

in fact:i didn’t know what’s difference of their three。