SegFault in LLVM

Hi, I am working on a conv2d transformation to get better performance for int8 tensors. However, I met an unusual segfault. The testcase is

import tvm
from tvm import relay as relay
import numpy as np
import tvm.contrib.graph_runtime as runtime

data = relay.var("data", shape=(1, 28, 28, 128), dtype='int8')
kernel = relay.var("w", shape=(3, 3, 128, 256), dtype='int8')
data_expr = relay.cast(data, "int32")
data_expr = relay.add(data_expr, relay.const(128, "int32"))
data_expr = relay.clip(data_expr, a_min=0, a_max=255)
data_expr = relay.cast(data_expr, "uint8")
conv = relay.nn.conv2d(data_expr, kernel, kernel_size=(3, 3), out_dtype='int32', data_layout='NHWC',
        kernel_layout='HWIO')
func = conv
func = relay.Function([data, kernel], func)

with relay.build_config(opt_level=0):
    graph, lib, params = relay.build(func, 'llvm', params=None)


data = np.random.random_integers(-10, 10,
                                 size=(1, 28, 28, 128)).astype('int8')
weight = np.random.random_integers(-10, 10,
                                   size=(3, 3, 128, 256)).astype('int8')

ctx = tvm.cpu(0)
module = runtime.create(graph, lib, ctx)
module.set_input('data', data)
module.set_input(**params)
module.run()

GDB bt shows

#0  0x00007fffe73cb214 in llvm::EVT::getExtendedVectorNumElements() const () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#1  0x00007fffe70ba440 in llvm::TargetLowering::SimplifyDemandedBits(llvm::SDValue, llvm::APInt const&, llvm::APInt const&, llvm::KnownBits&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#2  0x00007fffe70b96c8 in llvm::TargetLowering::SimplifyDemandedBits(llvm::SDValue, llvm::APInt const&, llvm::APInt const&, llvm::KnownBits&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#3  0x00007fffe70b790f in llvm::TargetLowering::SimplifyDemandedBits(llvm::SDValue, llvm::APInt const&, llvm::KnownBits&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#4  0x00007fffe6fc0f25 in (anonymous namespace)::DAGCombiner::SimplifyDemandedBits(llvm::SDValue, llvm::APInt const&) ()
   from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#5  0x00007fffe6f9d6f9 in (anonymous namespace)::DAGCombiner::visit(llvm::SDNode*) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#6  0x00007fffe6f6facc in (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#7  0x00007fffe6f6f093 in llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::AAResults*, llvm::CodeGenOpt::Level) ()
   from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#8  0x00007fffe70a3b62 in llvm::SelectionDAGISel::CodeGenAndEmitDAG() () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#9  0x00007fffe70a2ce9 in llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#10 0x00007fffe709ffb6 in llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#11 0x00007fffe6dbc3be in (anonymous namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&) ()
   from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#12 0x00007fffe72aa3a4 in llvm::MachineFunctionPass::runOnFunction(llvm::Function&) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#13 0x00007fffe7d658fa in llvm::FPPassManager::runOnFunction(llvm::Function&) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#14 0x00007fffe7d65c83 in llvm::FPPassManager::runOnModule(llvm::Module&) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#15 0x00007fffe7d661ff in llvm::legacy::PassManagerImpl::run(llvm::Module&) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#16 0x00007fffe71d81e8 in llvm::MCJIT::emitObject(llvm::Module*) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#17 0x00007fffe71d8444 in llvm::MCJIT::generateCodeForModule(llvm::Module*) () from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#18 0x00007fffe71d973e in llvm::MCJIT::findSymbol(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool) ()
   from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#19 0x00007fffe71d9208 in llvm::MCJIT::getSymbolAddress(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool) ()
   from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#20 0x00007fffe71d986a in llvm::MCJIT::getGlobalValueAddress(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) ()
   from /home/ubuntu/workplace/t1/tvm/build/libtvm.so
#21 0x00007fffe6386e8b in tvm::codegen::LLVMModuleNode::GetGlobalAddr (this=0x140e8e0, name="__tvm_main__")
    at /home/ubuntu/workplace/t1/tvm/src/codegen/llvm/llvm_module.cc:285
#22 0x00007fffe6386a97 in tvm::codegen::LLVMModuleNode::LazyInitJIT (this=0x140e8e0) at /home/ubuntu/workplace/t1/tvm/src/codegen/llvm/llvm_module.cc:272
#23 0x00007fffe638435c in tvm::codegen::LLVMModuleNode::GetFunction (this=0x140e8e0, name="fused_cast_1", sptr_to_self=
    std::shared_ptr (count 4, weak 0) 0x140e8e0) at /home/ubuntu/workplace/t1/tvm/src/codegen/llvm/llvm_module.cc:65
#24 0x00007fffe5dab7f9 in tvm::runtime::Module::GetFunction (this=0x140daf0, name="fused_cast_1", query_imports=false)
    at /home/ubuntu/workplace/t1/tvm/include/tvm/runtime/packed_func.h:1370
#25 0x00007fffe63f83e7 in tvm::runtime::GraphRuntime::CreateTVMOp (this=0x140d9d0, param=..., args=std::vector of length 2, capacity 2 = {...}, num_inputs=1)
    at /home/ubuntu/workplace/t1/tvm/src/runtime/graph/graph_runtime.cc:399
#26 0x00007fffe63f77a3 in tvm::runtime::GraphRuntime::SetupOpExecs (this=0x140d9d0) at /home/ubuntu/workplace/t1/tvm/src/runtime/graph/graph_runtime.cc:346
#27 0x00007fffe63f4135 in tvm::runtime::GraphRuntime::Init (this=0x140d9d0,
    graph_json="{\n  \"nodes\": [\n    {\n      \"op\": \"null\", \n      \"name\": \"data\", \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"w\", \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"tvm_op\", \n     "..., module=...,
    ctxs=std::vector of length 1, capacity 1 = {...}) at /home/ubuntu/workplace/t1/tvm/src/runtime/graph/graph_runtime.cc:80
#28 0x00007fffe63f98c6 in tvm::runtime::GraphRuntimeCreate (
    sym_json="{\n  \"nodes\": [\n    {\n      \"op\": \"null\", \n      \"name\": \"data\", \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"w\", \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"tvm_op\", \n     "..., m=..., ctxs=std::vector of length 1, capacity 1 = {...})
    at /home/ubuntu/workplace/t1/tvm/src/runtime/graph/graph_runtime.cc:482
#29 0x00007fffe63f9c0f in tvm::runtime::<lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)>::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue *) const (__closure=0x15abf40, args=..., rv=0x7fffffffd450) at /home/ubuntu/workplace/t1/tvm/src/runtime/graph/graph_runtime.cc:512
#30 0x00007fffe63fbbab in std::_Function_handler<void(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::runtime::<lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)> >::_M_invoke(const std::_Any_data &, <unknown type in /home/ubuntu/workplace/t1/tvm/build/libtvm.so, CU 0x4a0b091, DIE 0x4a5ae33>, <unknown type in /home/ubuntu/workplace/t1/tvm/build/libtvm.so, CU 0x4a0b091, DIE 0x4a5ae38>) (__functor=...,
    __args#0=<unknown type in /home/ubuntu/workplace/t1/tvm/build/libtvm.so, CU 0x4a0b091, DIE 0x4a5ae33>,
    __args#1=<unknown type in /home/ubuntu/workplace/t1/tvm/build/libtvm.so, CU 0x4a0b091, DIE 0x4a5ae38>) at /usr/include/c++/5/functional:1871
#31 0x00007fffe5902fac in std::function<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)>::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const (this=0x15abf40, __args#0=..., __args#1=0x7fffffffd450) at /usr/include/c++/5/functional:2267
#32 0x00007fffe5a6d462 in tvm::runtime::PackedFunc::CallPacked (this=0x15abf40, args=..., rv=0x7fffffffd450)
    at /home/ubuntu/workplace/t1/tvm/include/tvm/runtime/packed_func.h:1052
#33 0x00007fffe639b797 in TVMFuncCall (func=0x15abf40, args=0x146e080, arg_type_codes=0x7fffe2625e68, num_args=4, ret_val=0x7fffe2625cd0,
    ret_type_code=0x7fffe26259a0) at /home/ubuntu/workplace/t1/tvm/src/runtime/c_runtime_api.cc:471
#34 0x00007ffff6860e20 in ffi_call_unix64 () from /usr/lib/python3.5/lib-dynload/_ctypes.cpython-35m-x86_64-linux-gnu.so
#35 0x00007ffff686088b in ffi_call () from /usr/lib/python3.5/lib-dynload/_ctypes.cpython-35m-x86_64-linux-gnu.so
#36 0x00007ffff685b01a in _ctypes_callproc () from /usr/lib/python3.5/lib-dynload/_ctypes.cpython-35m-x86_64-linux-gnu.so
#37 0x00007ffff684efcb in ?? () from /usr/lib/python3.5/lib-dynload/_ctypes.cpython-35m-x86_64-linux-gnu.so
#38 0x00000000005c20e7 in PyObject_Call ()
#39 0x000000000053b656 in PyEval_EvalFrameEx ()
#40 0x0000000000540b0b in PyEval_EvalCodeEx ()
#41 0x00000000004ec2e3 in ?? ()
#42 0x00000000005c20e7 in PyObject_Call ()
#43 0x00000000004fbfce in ?? ()
#44 0x00000000005c20e7 in PyObject_Call ()
#45 0x0000000000574db6 in ?? ()
#46 0x00000000005c20e7 in PyObject_Call ()
#47 0x0000000000538cab in PyEval_EvalFrameEx ()
#48 0x000000000053b294 in PyEval_EvalFrameEx ()
#49 0x000000000053fc97 in ?? ()
#50 0x00000000005409bf in PyEval_EvalCode ()
#51 0x000000000060cb42 in ?? ()
#52 0x000000000060efea in PyRun_FileExFlags ()
#53 0x000000000060f7dc in PyRun_SimpleFileExFlags ()
#54 0x0000000000640256 in Py_Main ()
#55 0x00000000004d0001 in main ()

Can somebody help looking at this?

@vinx13 @ziheng @tqchen @zhiics @yzhliu

@FrozenGene This is the error that I am getting for quantized MobileNetV2.

Fix - https://github.com/dmlc/tvm/pull/4159

@janimesh @zhiics

I’ve also seen the segfault from getExtendedVectorNumElements. As mentioned in the PR above, a target “llvm” segfaults, but “llvm -mcpu=avx2” doesn’t. Moreover, it seems the segfault only occurs with LLVM 9. If I use 8, even the target “llvm” works fine.

When LLVM 10 is out (which should be soon), I’ll see if this issue has been fixed.

1 Like