Hi,
I just converted the mobilenet model 608x608 to TVM
It is taking 0.2 seconds per frame and giving me 5 FPS on 1050 TI with full cuda cores usage.
Time per frame:
0.20082473754882812
0.19168806076049805
0.19301962852478027
0.19402718544006348
0.1933760643005371
0.1952970027923584
0.2037806510925293
0.1919691562652588
0.20432472229003906
0.20802545547485352
0.20062041282653809
nvidia-smi -i 0 --query-gpu=index,timestamp,utilization.gpu,power.draw,temperature.gpu --format=csv -l 1
0, 2019/04/22 15:39:58.050, 90 %, [Not Supported], 56
0, 2019/04/22 15:39:59.050, 88 %, [Not Supported], 56
0, 2019/04/22 15:40:00.051, 90 %, [Not Supported], 56
0, 2019/04/22 15:40:01.051, 88 %, [Not Supported], 56
0, 2019/04/22 15:40:02.051, 88 %, [Not Supported], 57
0, 2019/04/22 15:40:03.052, 96 %, [Not Supported], 57
0, 2019/04/22 15:40:04.052, 100 %, [Not Supported], 57
0, 2019/04/22 15:40:05.053, 91 %, [Not Supported], 57
0, 2019/04/22 15:40:06.053, 89 %, [Not Supported], 58
0, 2019/04/22 15:40:07.053, 88 %, [Not Supported], 58
Am i doing something wrong or is this current performance benchmark ?
Code:
def display(img, out, thresh=0.5):
pens = dict()
for det in out:
cid = int(det[0])
if cid < 0:
continue
score = det[1]
if score < thresh:
continue
scales = [img.shape[1], img.shape[0]] * 2
xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255,0,0), 2) cv2.putText(img, class_names[cid],(xmin,ymin), font, 1, (200,0,0), 3, cv2.LINE_AA) cv2.imshow('frame', img)
cap = cv2.VideoCapture(“rtsp://admin:admin123@192.168.1.193:554/Streaming/Channels/101”)
while(cap.isOpened()):
t0 = time.time()
ret, image = cap.read()
#image = cv2.imread(frame)
img_data = cv2.resize(image, (data_shape[2], data_shape[3]))
img_data = img_data[:, :, (2, 1, 0)].astype(np.float32)
#img_data -= np.array([123, 117, 104])
img_data = np.transpose(np.array(img_data), (2, 0, 1))
img_data = np.expand_dims(img_data, axis=0)module.run(data=img_data) tvm_output = module.get_output(0) #print(tvm_output) display(image, tvm_output.asnumpy()[0], thresh=0.25) if cv2.waitKey(1) & 0xFF == ord('q'): break print(time.time()-t0)
cap.release()
cv2.destroyAllWindows()