Hi, Thanks a lot for your prompt response. I have modified the description above to be more specific about the tutorials I am running.
Please find the optimal configuration for “llvm” (resnet-18) below. I have enumerated the layers for convenience. One difference for instance, is that for “llvm” there are 3 schedules corresponding to 3 convolutions of size [1 512 7 7], whereas in the cuda log, as shown in my reply below this one, there is only 1 schedule for 1 convolution of size [1 512 7 7].
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 3, 224, 224], “float32”], [“TENSOR”, [64, 3, 7, 7], “float32”], [2, 2], [3, 3, 3, 3], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 3, 224, 224, “float32”], [64, 3, 7, 7, “float32”], [2, 2], [3, 3, 3, 3], [1, 1], “NCHW”, “float32”], {“i”: 136, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 1]], [“tile_oc”, “sp”, [-1, 32]], [“tile_ow”, “sp”, [-1, 1]], [“unroll_kw”, “ot”, false]]}], “r”: [[0.01014757672815534], 0, 2.5155513286590576, 1581436858.4419928], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 64, 56, 56], “float32”], [“TENSOR”, [64, 64, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 64, 56, 56, “float32”], [64, 64, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 82, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 2]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009950462727272727], 0, 2.7125132083892822, 1581437816.7206779], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 64, 56, 56], “float32”], [“TENSOR”, [64, 64, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 64, 56, 56, “float32”], [64, 64, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 82, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 2]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009950462727272727], 0, 2.7125132083892822, 1581437816.7206779], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 64, 56, 56], “float32”], [“TENSOR”, [64, 64, 1, 1], “float32”], [1, 1], [0, 0, 0, 0], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 64, 56, 56, “float32”], [64, 64, 1, 1, “float32”], [1, 1], [0, 0, 0, 0], [1, 1], “NCHW”, “float32”], {“i”: 425, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 1]], [“tile_oh”, “ot”, 2]]}], “r”: [[0.0011312621484992102], 0, 3.207817316055298, 1581441951.768949], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 64, 56, 56], “float32”], [“TENSOR”, [64, 64, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 64, 56, 56, “float32”], [64, 64, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 82, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 2]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009950462727272727], 0, 2.7125132083892822, 1581437816.7206779], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 64, 56, 56], “float32”], [“TENSOR”, [64, 64, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 64, 56, 56, “float32”], [64, 64, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 82, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 2]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009950462727272727], 0, 2.7125132083892822, 1581437816.7206779], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 64, 56, 56], “float32”], [“TENSOR”, [128, 64, 3, 3], “float32”], [2, 2], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 64, 56, 56, “float32”], [128, 64, 3, 3, “float32”], [2, 2], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 89, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 2]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.005048907204472844], 0, 4.021136522293091, 1581443233.6869035], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 128, 28, 28], “float32”], [“TENSOR”, [128, 128, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 128, 28, 28, “float32”], [128, 128, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 101, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 2]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009989094614814816], 0, 3.1271796226501465, 1581446749.781529], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 64, 56, 56], “float32”], [“TENSOR”, [128, 64, 1, 1], “float32”], [2, 2], [0, 0, 0, 0], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 64, 56, 56, “float32”], [128, 64, 1, 1, “float32”], [2, 2], [0, 0, 0, 0], [1, 1], “NCHW”, “float32”], {“i”: 368, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 16]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 1]], [“tile_oh”, “ot”, 2]]}], “r”: [[0.0005921782860057119], 0, 3.293405532836914, 1581446003.794353], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 128, 28, 28], “float32”], [“TENSOR”, [128, 128, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 128, 28, 28, “float32”], [128, 128, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 101, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 2]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009989094614814816], 0, 3.1271796226501465, 1581446749.781529], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 128, 28, 28], “float32”], [“TENSOR”, [128, 128, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 128, 28, 28, “float32”], [128, 128, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 101, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 2]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009989094614814816], 0, 3.1271796226501465, 1581446749.781529], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 128, 28, 28], “float32”], [“TENSOR”, [256, 128, 3, 3], “float32”], [2, 2], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 128, 28, 28, “float32”], [256, 128, 3, 3, “float32”], [2, 2], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 109, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 32]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 2]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.005018088178321678], 0, 3.9786384105682373, 1581449992.5711432], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 256, 14, 14], “float32”], [“TENSOR”, [256, 256, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 256, 14, 14, “float32”], [256, 256, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 188, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 256]], [“tile_oc”, “sp”, [-1, 4]], [“tile_ow”, “sp”, [-1, 7]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009956693457142857], 0, 3.0830090045928955, 1581453318.564188], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 128, 28, 28], “float32”], [“TENSOR”, [256, 128, 1, 1], “float32”], [2, 2], [0, 0, 0, 0], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 128, 28, 28, “float32”], [256, 128, 1, 1, “float32”], [2, 2], [0, 0, 0, 0], [1, 1], “NCHW”, “float32”], {“i”: 324, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 16]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 1]], [“tile_oh”, “ot”, 2]]}], “r”: [[0.0005877056827348746], 0, 3.120971918106079, 1581451718.8096752], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 256, 14, 14], “float32”], [“TENSOR”, [256, 256, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 256, 14, 14, “float32”], [256, 256, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 188, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 256]], [“tile_oc”, “sp”, [-1, 4]], [“tile_ow”, “sp”, [-1, 7]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009956693457142857], 0, 3.0830090045928955, 1581453318.564188], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 256, 14, 14], “float32”], [“TENSOR”, [256, 256, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 256, 14, 14, “float32”], [256, 256, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 188, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 256]], [“tile_oc”, “sp”, [-1, 4]], [“tile_ow”, “sp”, [-1, 7]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009956693457142857], 0, 3.0830090045928955, 1581453318.564188], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 256, 14, 14], “float32”], [“TENSOR”, [512, 256, 3, 3], “float32”], [2, 2], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 256, 14, 14, “float32”], [512, 256, 3, 3, “float32”], [2, 2], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 305, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 256]], [“tile_oc”, “sp”, [-1, 8]], [“tile_ow”, “sp”, [-1, 7]], [“unroll_kw”, “ot”, false]]}], “r”: [[0.005143994603278689], 0, 4.259227514266968, 1581454611.755201], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 512, 7, 7], “float32”], [“TENSOR”, [512, 512, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 512, 7, 7, “float32”], [512, 512, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 128, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 256]], [“tile_oc”, “sp”, [-1, 4]], [“tile_ow”, “sp”, [-1, 7]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009949430639639639], 0, 2.7014334201812744, 1581456113.8993032], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 256, 14, 14], “float32”], [“TENSOR”, [512, 256, 1, 1], “float32”], [2, 2], [0, 0, 0, 0], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 256, 14, 14, “float32”], [512, 256, 1, 1, “float32”], [2, 2], [0, 0, 0, 0], [1, 1], “NCHW”, “float32”], {“i”: 223, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 128]], [“tile_oc”, “sp”, [-1, 16]], [“tile_ow”, “sp”, [-1, 1]], [“tile_oh”, “ot”, 2]]}], “r”: [[0.0005641878775181305], 0, 3.1547939777374268, 1581454752.9796696], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 512, 7, 7], “float32”], [“TENSOR”, [512, 512, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 512, 7, 7, “float32”], [512, 512, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 128, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 256]], [“tile_oc”, “sp”, [-1, 4]], [“tile_ow”, “sp”, [-1, 7]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009949430639639639], 0, 2.7014334201812744, 1581456113.8993032], “v”: 0.1}
-
{“i”: [“llvm”, “topi_x86_conv2d_NCHWc”, [[“TENSOR”, [1, 512, 7, 7], “float32”], [“TENSOR”, [512, 512, 3, 3], “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {}, [“conv2d”, [1, 512, 7, 7, “float32”], [512, 512, 3, 3, “float32”], [1, 1], [1, 1, 1, 1], [1, 1], “NCHW”, “float32”], {“i”: 128, “c”: null, “t”: “direct”, “e”: [[“tile_ic”, “sp”, [-1, 256]], [“tile_oc”, “sp”, [-1, 4]], [“tile_ow”, “sp”, [-1, 7]], [“unroll_kw”, “ot”, true]]}], “r”: [[0.009949430639639639], 0, 2.7014334201812744, 1581456113.8993032], “v”: 0.1}