Task 2 did not find any solution did not seem to affect the autotvm process. it successfully generated a log file. I was able to run inference as well. (but it was slower than the gpu)
This is the name of the task 2:
Task(func_name=conv2d_nhwc.cuda, args=(('TENSOR', (1, 15, 15, 1024), 'float32'), ('TENSOR', (3, 3, 1024, 512), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nhwc.cuda', ('TENSOR', (1, 15, 15, 1024), 'float32'), ('TENSOR', (3, 3, 1024, 512), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'))
And this is the final generated log:
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc_winograd_direct.cuda", [["TENSOR", [1, 418, 418, 3], "float32"], ["TENSOR", [3, 3, 3, 16], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 9569422382, "code_hash": null, "entity": [["thread_num_inverse", "ot", 64], ["thread_num_data", "ot", 32], ["thread_num_kernel", "ot", 1], ["offset_inverse", "ot", 4], ["offset_data", "ot", 4], ["offset_kernel", "ot", 2], ["inverse_in_vector", "ot", 2], ["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 8, 2]], ["tile_x", "sp", [-1, 4, 2, 1]], ["tile_rc", "sp", [-1, 3]], ["offset_bgemm", "ot", 8], ["vector_bgemm", "ot", 1]]}, "result": [[0.00045494442771084335], 0, 1.6892707347869873, 1596808669.6243942], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc_winograd_tensorcore.cuda", [["TENSOR", [1, 210, 210, 16], "float32"], ["TENSOR", [3, 3, 16, 32], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 2322597763, "code_hash": null, "entity": [["thread_num_inverse", "ot", 128], ["thread_num_data", "ot", 64], ["thread_num_kernel", "ot", 1], ["offset_inverse", "ot", 0], ["offset_data", "ot", 2], ["offset_kernel", "ot", 2], ["inverse_in_vector", "ot", 1], ["block_row_warps", "ot", 1], ["block_col_warps", "ot", 1], ["warp_row_tiles", "ot", 2], ["warp_col_tiles", "ot", 1], ["chunk", "ot", 1], ["offset", "ot", 8], ["offsetCS", "ot", 8], ["vec", "ot", 1], ["wmma_m", "ot", 32]]}, "result": [[0.00019930246134868421], 0, 2.1464030742645264, 1596808766.680427], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc_winograd_tensorcore.cuda", [["TENSOR", [1, 106, 106, 32], "float32"], ["TENSOR", [3, 3, 32, 64], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 44075756, "code_hash": null, "entity": [["thread_num_inverse", "ot", 32], ["thread_num_data", "ot", 32], ["thread_num_kernel", "ot", 1], ["offset_inverse", "ot", 2], ["offset_data", "ot", 4], ["offset_kernel", "ot", 4], ["inverse_in_vector", "ot", 2], ["block_row_warps", "ot", 1], ["block_col_warps", "ot", 1], ["warp_row_tiles", "ot", 1], ["warp_col_tiles", "ot", 1], ["chunk", "ot", 2], ["offset", "ot", 8], ["offsetCS", "ot", 0], ["vec", "ot", 1], ["wmma_m", "ot", 16]]}, "result": [[0.00010162571786690976], 0, 1.9622037410736084, 1596808931.107104], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc_winograd_direct.cuda", [["TENSOR", [1, 54, 54, 64], "float32"], ["TENSOR", [3, 3, 64, 128], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 11164870461, "code_hash": null, "entity": [["thread_num_inverse", "ot", 32], ["thread_num_data", "ot", 64], ["thread_num_kernel", "ot", 128], ["offset_inverse", "ot", 4], ["offset_data", "ot", 4], ["offset_kernel", "ot", 2], ["inverse_in_vector", "ot", 2], ["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 4, 13]], ["tile_x", "sp", [-1, 1, 16, 4]], ["tile_rc", "sp", [-1, 4]], ["offset_bgemm", "ot", 2], ["vector_bgemm", "ot", 2]]}, "result": [[7.327778189550426e-05], 0, 2.0689899921417236, 1596809138.3889282], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc_winograd_direct.cuda", [["TENSOR", [1, 28, 28, 128], "float32"], ["TENSOR", [3, 3, 128, 256], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 844580911, "code_hash": null, "entity": [["thread_num_inverse", "ot", 32], ["thread_num_data", "ot", 64], ["thread_num_kernel", "ot", 32], ["offset_inverse", "ot", 4], ["offset_data", "ot", 4], ["offset_kernel", "ot", 1], ["inverse_in_vector", "ot", 1], ["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 13, 1, 1]], ["tile_x", "sp", [-1, 2, 16, 2]], ["tile_rc", "sp", [-1, 16]], ["offset_bgemm", "ot", 4], ["vector_bgemm", "ot", 1]]}, "result": [[9.385109207459207e-05], 0, 2.265333890914917, 1596809475.2130105], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc.cuda", [["TENSOR", [1, 28, 28, 128], "float32"], ["TENSOR", [3, 3, 128, 256], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 230, "code_hash": null, "entity": [["tile_n", "ot", 8], ["tile_c", "ot", 4], ["num_thread_n", "ot", 8], ["num_thread_c", "ot", 16], ["vthread_n", "ot", 1], ["vthread_c", "ot", 2], ["step", "ot", 16]]}, "result": [[0.0007314728575757575], 0, 2.0213303565979004, 1596809609.9690342], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc.cuda", [["TENSOR", [1, 15, 15, 256], "float32"], ["TENSOR", [3, 3, 256, 512], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 678, "code_hash": null, "entity": [["tile_n", "ot", 2], ["tile_c", "ot", 4], ["num_thread_n", "ot", 4], ["num_thread_c", "ot", 8], ["vthread_n", "ot", 1], ["vthread_c", "ot", 1], ["step", "ot", 32]]}, "result": [[0.0007752656313131312], 0, 1.6025142669677734, 1596809818.1569047], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc_winograd_direct.cuda", [["TENSOR", [1, 15, 15, 512], "float32"], ["TENSOR", [3, 3, 512, 1024], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 7369000834, "code_hash": null, "entity": [["thread_num_inverse", "ot", 256], ["thread_num_data", "ot", 32], ["thread_num_kernel", "ot", 128], ["offset_inverse", "ot", 4], ["offset_data", "ot", 0], ["offset_kernel", "ot", 4], ["inverse_in_vector", "ot", 4], ["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 7, 7]], ["tile_x", "sp", [-1, 4, 16, 1]], ["tile_rc", "sp", [-1, 8]], ["offset_bgemm", "ot", 8], ["vector_bgemm", "ot", 4]]}, "result": [[0.00031195939096774196], 0, 2.1854963302612305, 1596810081.897409], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc_winograd_direct.cuda", [["TENSOR", [1, 15, 15, 1024], "float32"], ["TENSOR", [3, 3, 1024, 512], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 92423074, "code_hash": null, "entity": [["thread_num_inverse", "ot", 256], ["thread_num_data", "ot", 256], ["thread_num_kernel", "ot", 64], ["offset_inverse", "ot", 2], ["offset_data", "ot", 1], ["offset_kernel", "ot", 4], ["inverse_in_vector", "ot", 2], ["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 7, 7, 1]], ["tile_x", "sp", [-1, 2, 4, 2]], ["tile_rc", "sp", [-1, 4]], ["offset_bgemm", "ot", 0], ["vector_bgemm", "ot", 1]]}, "result": [[0.0003508235021770682], 0, 2.213620901107788, 1596810365.3167224], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc.cuda", [["TENSOR", [1, 15, 15, 1024], "float32"], ["TENSOR", [3, 3, 1024, 512], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 202, "code_hash": null, "entity": [["tile_n", "ot", 4], ["tile_c", "ot", 4], ["num_thread_n", "ot", 8], ["num_thread_c", "ot", 8], ["vthread_n", "ot", 1], ["vthread_c", "ot", 2], ["step", "ot", 16]]}, "result": [[0.003418024622222222], 0, 1.7397112846374512, 1596810564.309492], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc.cuda", [["TENSOR", [1, 210, 210, 16], "float32"], ["TENSOR", [3, 3, 16, 32], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 274, "code_hash": null, "entity": [["tile_n", "ot", 4], ["tile_c", "ot", 4], ["num_thread_n", "ot", 4], ["num_thread_c", "ot", 8], ["vthread_n", "ot", 2], ["vthread_c", "ot", 2], ["step", "ot", 16]]}, "result": [[0.0006982166502890174], 0, 2.1854822635650635, 1596752251.0586443], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc.cuda", [["TENSOR", [1, 106, 106, 32], "float32"], ["TENSOR", [3, 3, 32, 64], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 111, "code_hash": null, "entity": [["tile_n", "ot", 2], ["tile_c", "ot", 4], ["num_thread_n", "ot", 4], ["num_thread_c", "ot", 8], ["vthread_n", "ot", 2], ["vthread_c", "ot", 1], ["step", "ot", 16]]}, "result": [[0.0007276232560386474], 0, 1.6302690505981445, 1596752496.2037885], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc.cuda", [["TENSOR", [1, 54, 54, 64], "float32"], ["TENSOR", [3, 3, 64, 128], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 301, "code_hash": null, "entity": [["tile_n", "ot", 4], ["tile_c", "ot", 4], ["num_thread_n", "ot", 4], ["num_thread_c", "ot", 16], ["vthread_n", "ot", 2], ["vthread_c", "ot", 2], ["step", "ot", 16]]}, "result": [[0.0006396164261603376], 0, 1.6311135292053223, 1596752778.9645839], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc_winograd_direct.cuda", [["TENSOR", [1, 15, 15, 256], "float32"], ["TENSOR", [3, 3, 256, 512], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 2661384769, "code_hash": null, "entity": [["thread_num_inverse", "ot", 256], ["thread_num_data", "ot", 128], ["thread_num_kernel", "ot", 1], ["offset_inverse", "ot", 0], ["offset_data", "ot", 2], ["offset_kernel", "ot", 0], ["inverse_in_vector", "ot", 4], ["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 7, 7, 1]], ["tile_x", "sp", [-1, 1, 64, 1]], ["tile_rc", "sp", [-1, 16]], ["offset_bgemm", "ot", 2], ["vector_bgemm", "ot", 2]]}, "result": [[8.728379057971015e-05], 0, 2.2499608993530273, 1596753135.8903747], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc.cuda", [["TENSOR", [1, 15, 15, 512], "float32"], ["TENSOR", [3, 3, 512, 1024], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 113, "code_hash": null, "entity": [["tile_n", "ot", 8], ["tile_c", "ot", 4], ["num_thread_n", "ot", 4], ["num_thread_c", "ot", 8], ["vthread_n", "ot", 2], ["vthread_c", "ot", 1], ["step", "ot", 16]]}, "result": [[0.003101637875], 0, 3.969357490539551, 1596753684.8480952], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nhwc.cuda", [["TENSOR", [1, 13, 13, 512], "float32"], ["TENSOR", [1, 1, 512, 425], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 760, "code_hash": null, "entity": [["tile_n", "ot", 4], ["tile_c", "ot", 4], ["num_thread_n", "ot", 4], ["num_thread_c", "ot", 8], ["vthread_n", "ot", 2], ["vthread_c", "ot", 1], ["step", "ot", 32]]}, "result": [[0.00023400437094281296], 0, 1.702641248703003, 1596754126.4415483], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 143261, "code_hash": null, "entity": [["tile_f", "sp", [-1, 2, 32, 1]], ["tile_y", "sp", [-1, 1, 1, 7]], ["tile_x", "sp", [-1, 1, 7, 1]], ["tile_rc", "sp", [-1, 16]], ["tile_ry", "sp", [-1, 1]], ["tile_rx", "sp", [-1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}, "result": [[3.086108868265017e-05], 0, 2.1766304969787598, 1596638736.6176426], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [2, 2], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 1116793, "code_hash": null, "entity": [["tile_f", "sp", [-1, 4, 16, 2]], ["tile_y", "sp", [-1, 1, 1, 1]], ["tile_x", "sp", [-1, 2, 7, 1]], ["tile_rc", "sp", [-1, 4]], ["tile_ry", "sp", [-1, 1]], ["tile_rx", "sp", [-1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}, "result": [[2.1558027118334548e-05], 0, 2.02054500579834, 1596638875.932826], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [2, 2], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 1099263, "code_hash": null, "entity": [["tile_f", "sp", [-1, 1, 64, 2]], ["tile_y", "sp", [-1, 1, 1, 1]], ["tile_x", "sp", [-1, 1, 7, 4]], ["tile_rc", "sp", [-1, 32]], ["tile_ry", "sp", [-1, 1]], ["tile_rx", "sp", [-1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}, "result": [[1.8940856249498918e-05], 0, 2.005221366882324, 1596639012.0224953], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 1, 1], "float32"], [1, 1], [0, 0, 0, 0], [1, 1], "float32"], {}], "config": {"index": 20053817, "code_hash": null, "entity": [["tile_f", "sp", [-1, 1, 2, 16]], ["tile_y", "sp", [-1, 2, 4, 1]], ["tile_x", "sp", [-1, 1, 14, 1]], ["tile_rc", "sp", [-1, 4]], ["tile_ry", "sp", [-1, 1]], ["tile_rx", "sp", [-1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}, "result": [[1.3704667724308127e-05], 0, 2.1790435314178467, 1596639247.948683], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 7, 7], "float32"], [2, 2], [3, 3, 3, 3], [1, 1], "float32"], {}], "config": {"index": 63835487, "code_hash": null, "entity": [["tile_f", "sp", [-1, 2, 16, 1]], ["tile_y", "sp", [-1, 4, 7, 1]], ["tile_x", "sp", [-1, 1, 1, 8]], ["tile_rc", "sp", [-1, 1]], ["tile_ry", "sp", [-1, 7]], ["tile_rx", "sp", [-1, 7]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}, "result": [[8.191113981042654e-05], 0, 2.6882503032684326, 1596639396.3446727], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw_winograd.cuda", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 97438, "code_hash": null, "entity": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 2, 32]], ["tile_x", "sp", [-1, 1, 49, 4]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 0]]}, "result": [[3.224812635135135e-05], 0, 4.217549562454224, 1596639608.6664226], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 57167252, "code_hash": null, "entity": [["tile_f", "sp", [-1, 2, 8, 2]], ["tile_y", "sp", [-1, 4, 1, 1]], ["tile_x", "sp", [-1, 1, 28, 1]], ["tile_rc", "sp", [-1, 2]], ["tile_ry", "sp", [-1, 3]], ["tile_rx", "sp", [-1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}, "result": [[6.48782944519621e-05], 0, 2.0824203491210938, 1596639986.995053], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [2, 2], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 25992895, "code_hash": null, "entity": [["tile_f", "sp", [-1, 2, 8, 2]], ["tile_y", "sp", [-1, 2, 2, 1]], ["tile_x", "sp", [-1, 1, 14, 1]], ["tile_rc", "sp", [-1, 4]], ["tile_ry", "sp", [-1, 3]], ["tile_rx", "sp", [-1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}, "result": [[6.261991542678152e-05], 0, 2.3594276905059814, 1596640424.8481562], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw_winograd.cuda", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 125218, "code_hash": null, "entity": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 16, 2]], ["tile_x", "sp", [-1, 7, 2, 2]], ["tile_rc", "sp", [-1, 4]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 0]]}, "result": [[3.978000149055979e-05], 0, 4.92205023765564, 1596640735.21119], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 29448294, "code_hash": null, "entity": [["tile_f", "sp", [-1, 1, 8, 2]], ["tile_y", "sp", [-1, 4, 1, 1]], ["tile_x", "sp", [-1, 1, 14, 1]], ["tile_rc", "sp", [-1, 2]], ["tile_ry", "sp", [-1, 3]], ["tile_rx", "sp", [-1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}, "result": [[7.648591985951469e-05], 0, 2.093278408050537, 1596641115.4107127], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [2, 2], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 7875316, "code_hash": null, "entity": [["tile_f", "sp", [-1, 2, 16, 1]], ["tile_y", "sp", [-1, 2, 1, 1]], ["tile_x", "sp", [-1, 2, 7, 1]], ["tile_rc", "sp", [-1, 4]], ["tile_ry", "sp", [-1, 3]], ["tile_rx", "sp", [-1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}, "result": [[7.978286595531844e-05], 0, 2.290926694869995, 1596641311.853063], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw_winograd.cuda", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 81723, "code_hash": null, "entity": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 8, 1, 2]], ["tile_x", "sp", [-1, 1, 49, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}, "result": [[5.0846598899936535e-05], 0, 2.4042413234710693, 1596641706.4003792], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 8799805, "code_hash": null, "entity": [["tile_f", "sp", [-1, 2, 8, 1]], ["tile_y", "sp", [-1, 1, 2, 1]], ["tile_x", "sp", [-1, 7, 2, 1]], ["tile_rc", "sp", [-1, 2]], ["tile_ry", "sp", [-1, 3]], ["tile_rx", "sp", [-1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}, "result": [[0.00011983135475834579], 0, 2.174240827560425, 1596641921.8496149], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [2, 2], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 608554, "code_hash": null, "entity": [["tile_f", "sp", [-1, 1, 16, 1]], ["tile_y", "sp", [-1, 1, 7, 1]], ["tile_x", "sp", [-1, 1, 1, 7]], ["tile_rc", "sp", [-1, 2]], ["tile_ry", "sp", [-1, 3]], ["tile_rx", "sp", [-1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}, "result": [[0.00017544716376811595], 0, 2.2342605590820312, 1596642145.242544], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw_winograd.cuda", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 414729, "code_hash": null, "entity": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 4, 8, 1]], ["tile_x", "sp", [-1, 1, 4, 4]], ["tile_rc", "sp", [-1, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}, "result": [[5.793594312796208e-05], 0, 2.0294864177703857, 1596642299.7055194], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32", "conv2d_nchw.cuda", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], "float32"], {}], "config": {"index": 818654, "code_hash": null, "entity": [["tile_f", "sp", [-1, 1, 16, 1]], ["tile_y", "sp", [-1, 7, 1, 1]], ["tile_x", "sp", [-1, 1, 7, 1]], ["tile_rc", "sp", [-1, 4]], ["tile_ry", "sp", [-1, 3]], ["tile_rx", "sp", [-1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}, "result": [[0.00017657052846715328], 0, 2.1720330715179443, 1596642561.9635131], "version": 0.2, "tvm_version": "0.7.dev1"}