Hi,
I am getting errors using AutoTVM.
- I am using cuda10 & llvm7m
- I pass “-ccbin /usr/bin/cuda-gcc” over tvm/contrib/nvcc.py as option, to make sure GCC version 7.3 (cuda compatible).
Spent some time by debugging the autotvm process and all generated C code compiles fine into .ptx.
But have no idea why validity of the kernels are rejected.
$ wget https://raw.githubusercontent.com/dmlc/tvm/master/tutorials/autotvm/tune_conv2d_cuda.py
$ python3 tune_conv2d_cuda.py
ConfigSpace (len=10454400, space_map=
0 tile_f: Split(policy=all, product=512, num_outputs=4) len=220
1 tile_y: Split(policy=all, product=7, num_outputs=4) len=4
2 tile_x: Split(policy=all, product=7, num_outputs=4) len=4
3 tile_rc: Split(policy=all, product=512, num_outputs=3) len=55
4 tile_ry: Split(policy=all, product=3, num_outputs=3) len=3
5 tile_rx: Split(policy=all, product=3, num_outputs=3) len=3
6 auto_unroll_max_step: OtherOption([0, 512, 1500]) len=3
7 unroll_explicit: OtherOption([0, 1]) len=2
)
Get devices for measurement successfully!
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
No: 1 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(’’,), error_no=7, all_cost=200, timestamp=1546003014.4647906) [(‘tile_f’, [128, 4, 1, 1]), (‘tile_y’, [7, 1, 1, 1]), (‘tile_x’, [1, 1, 7, 1]), (‘tile_rc’, [8, 16, 4]), (‘tile_ry’, [1, 3, 1]), (‘tile_rx’, [1, 1, 3]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 1)],None,6665122
No: 2 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.020939111709594727, timestamp=1546003013.933304) [(‘tile_f’, [2, 16, 16, 1]), (‘tile_y’, [1, 1, 7, 1]), (‘tile_x’, [1, 1, 7, 1]), (‘tile_rc’, [16, 4, 8]), (‘tile_ry’, [1, 1, 3]), (‘tile_rx’, [3, 1, 1]), (‘auto_unroll_max_step’, 512), (‘unroll_explicit’, 1)],None,7461118
No: 3 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.0171053409576416, timestamp=1546003013.9334168) [(‘tile_f’, [2, 8, 16, 2]), (‘tile_y’, [1, 7, 1, 1]), (‘tile_x’, [1, 1, 7, 1]), (‘tile_rc’, [1, 4, 128]), (‘tile_ry’, [1, 1, 3]), (‘tile_rx’, [1, 1, 3]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 1)],None,6957588
No: 4 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.023586273193359375, timestamp=1546003013.933508) [(‘tile_f’, [128, 4, 1, 1]), (‘tile_y’, [1, 1, 7, 1]), (‘tile_x’, [1, 7, 1, 1]), (‘tile_rc’, [2, 1, 256]), (‘tile_ry’, [1, 3, 1]), (‘tile_rx’, [3, 1, 1]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 0)],None,377962
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
No: 5 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.02170276641845703, timestamp=1546003014.5508878) [(‘tile_f’, [4, 8, 8, 2]), (‘tile_y’, [7, 1, 1, 1]), (‘tile_x’, [1, 1, 7, 1]), (‘tile_rc’, [2, 256, 1]), (‘tile_ry’, [3, 1, 1]), (‘tile_rx’, [1, 3, 1]), (‘auto_unroll_max_step’, 512), (‘unroll_explicit’, 1)],None,7580402
No: 6 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(’’,), error_no=7, all_cost=200, timestamp=1546003015.4716434) [(‘tile_f’, [32, 1, 4, 4]), (‘tile_y’, [7, 1, 1, 1]), (‘tile_x’, [7, 1, 1, 1]), (‘tile_rc’, [64, 8, 1]), (‘tile_ry’, [1, 1, 3]), (‘tile_rx’, [1, 3, 1]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 1)],None,6205875
No: 7 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.014199495315551758, timestamp=1546003014.8930523) [(‘tile_f’, [4, 1, 4, 32]), (‘tile_y’, [1, 1, 7, 1]), (‘tile_x’, [1, 7, 1, 1]), (‘tile_rc’, [16, 4, 8]), (‘tile_ry’, [1, 3, 1]), (‘tile_rx’, [3, 1, 1]), (‘auto_unroll_max_step’, 512), (‘unroll_explicit’, 0)],None,2039594
No: 8 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.01955866813659668, timestamp=1546003014.893228) [(‘tile_f’, [16, 16, 1, 2]), (‘tile_y’, [1, 7, 1, 1]), (‘tile_x’, [1, 7, 1, 1]), (‘tile_rc’, [4, 32, 4]), (‘tile_ry’, [1, 3, 1]), (‘tile_rx’, [1, 3, 1]), (‘auto_unroll_max_step’, 1500), (‘unroll_explicit’, 0)],None,4344839
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
No: 9 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.02602696418762207, timestamp=1546003015.5407526) [(‘tile_f’, [1, 1, 4, 128]), (‘tile_y’, [1, 7, 1, 1]), (‘tile_x’, [7, 1, 1, 1]), (‘tile_rc’, [128, 1, 4]), (‘tile_ry’, [1, 1, 3]), (‘tile_rx’, [1, 1, 3]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 1)],None,6843315
No: 10 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.01750779151916504, timestamp=1546003015.5466475) [(‘tile_f’, [2, 4, 1, 64]), (‘tile_y’, [1, 1, 7, 1]), (‘tile_x’, [1, 7, 1, 1]), (‘tile_rc’, [2, 1, 256]), (‘tile_ry’, [3, 1, 1]), (‘tile_rx’, [3, 1, 1]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 1)],None,5411762
No: 11 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.014719486236572266, timestamp=1546003015.5467696) [(‘tile_f’, [2, 4, 4, 16]), (‘tile_y’, [1, 1, 7, 1]), (‘tile_x’, [1, 1, 1, 7]), (‘tile_rc’, [8, 2, 32]), (‘tile_ry’, [1, 1, 3]), (‘tile_rx’, [1, 3, 1]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 1)],None,6342777
No: 12 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(’’,), error_no=7, all_cost=200, timestamp=1546003018.0453963) [(‘tile_f’, [2, 8, 1, 32]), (‘tile_y’, [1, 1, 1, 7]), (‘tile_x’, [1, 1, 7, 1]), (‘tile_rc’, [256, 1, 2]), (‘tile_ry’, [3, 1, 1]), (‘tile_rx’, [1, 3, 1]), (‘auto_unroll_max_step’, 512), (‘unroll_explicit’, 0)],None,2361008
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
No: 13 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.017080307006835938, timestamp=1546003018.0889275) [(‘tile_f’, [1, 64, 2, 4]), (‘tile_y’, [1, 1, 1, 7]), (‘tile_x’, [1, 7, 1, 1]), (‘tile_rc’, [1, 512, 1]), (‘tile_ry’, [1, 1, 3]), (‘tile_rx’, [3, 1, 1]), (‘auto_unroll_max_step’, 512), (‘unroll_explicit’, 0)],None,2162934
No: 14 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(’’,), error_no=7, all_cost=200, timestamp=1546003019.7524147) [(‘tile_f’, [32, 1, 8, 2]), (‘tile_y’, [1, 7, 1, 1]), (‘tile_x’, [1, 1, 7, 1]), (‘tile_rc’, [8, 2, 32]), (‘tile_ry’, [1, 3, 1]), (‘tile_rx’, [3, 1, 1]), (‘auto_unroll_max_step’, 1500), (‘unroll_explicit’, 1)],None,9051979
No: 15 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.016710519790649414, timestamp=1546003019.188715) [(‘tile_f’, [32, 8, 2, 1]), (‘tile_y’, [1, 7, 1, 1]), (‘tile_x’, [1, 1, 7, 1]), (‘tile_rc’, [8, 16, 4]), (‘tile_ry’, [1, 1, 3]), (‘tile_rx’, [1, 3, 1]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 1)],None,6278153
No: 16 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.013083219528198242, timestamp=1546003019.1888168) [(‘tile_f’, [8, 8, 8, 1]), (‘tile_y’, [1, 1, 1, 7]), (‘tile_x’, [1, 1, 1, 7]), (‘tile_rc’, [2, 2, 128]), (‘tile_ry’, [3, 1, 1]), (‘tile_rx’, [3, 1, 1]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 1)],None,5406530
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
/usr/include/c++/8/bits/stl_vector.h:932: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = char; _Alloc = std::allocator; std::vector<_Tp, _Alloc>::reference = char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion ‘__builtin_expect(__n < this->size(), true)’ failed.
No: 17 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(’’,), error_no=7, all_cost=200, timestamp=1546003022.3853087) [(‘tile_f’, [4, 1, 8, 16]), (‘tile_y’, [1, 7, 1, 1]), (‘tile_x’, [1, 7, 1, 1]), (‘tile_rc’, [512, 1, 1]), (‘tile_ry’, [3, 1, 1]), (‘tile_rx’, [1, 1, 3]), (‘auto_unroll_max_step’, 0), (‘unroll_explicit’, 1)],None,6390079
No: 18 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.012630701065063477, timestamp=1546003020.3712323) [(‘tile_f’, [4, 1, 8, 16]), (‘tile_y’, [7, 1, 1, 1]), (‘tile_x’, [7, 1, 1, 1]), (‘tile_rc’, [8, 16, 4]), (‘tile_ry’, [1, 3, 1]), (‘tile_rx’, [3, 1, 1]), (‘auto_unroll_max_step’, 512), (‘unroll_explicit’, 0)],None,2017139
No: 19 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(’’,), error_no=7, all_cost=200, timestamp=1546003022.957653) [(‘tile_f’, [32, 8, 1, 2]), (‘tile_y’, [1, 1, 7, 1]), (‘tile_x’, [7, 1, 1, 1]), (‘tile_rc’, [32, 4, 4]), (‘tile_ry’, [1, 3, 1]), (‘tile_rx’, [1, 3, 1]), (‘auto_unroll_max_step’, 1500), (‘unroll_explicit’, 0)],None,4333618
No: 20 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(InstantiationError(‘Skipped because of invalid gpu kernel’),), error_no=1, all_cost=0.016761302947998047, timestamp=1546003021.8294916) [(‘tile_f’, [1, 4, 4, 32]), (‘tile_y’, [1, 7, 1, 1]), (‘tile_x’, [7, 1, 1, 1]), (‘tile_rc’, [4, 4, 32]), (‘tile_ry’, [1, 3, 1]), (‘tile_rx’, [3, 1, 1]), (‘auto_unroll_max_step’, 512), (‘unroll_explicit’, 1)],None,7311456