[Codegen] How to convert a "C" runtime module to an LLVM module?

Hi, I have created a “C” runtime module using an external codegen. Now am looking for cross compiling this module (.cc file) using LLVM tool chain.

Is it possible to convert a “C” module to LLVM module?

Any other way i can perform cross compilation of external codegen as per the current code?

Can anyone please give me some hint on this area?

Shouldn’t it just use clang to compile your c code to llvm IR?

https://docs.tvm.ai/dev/introduction_to_module_serialization.html

I think the tutorial may help you.

The module will be compiled into *.o with llvm by default if you enable llvm. In this way, you can complie on your PC and then copy the compiled module to your target manually or using RPC. Your target runtime only need to get the function interface from the *.so.

def export_library(self,
                       file_name,
                       fcompile=None,
                       **kwargs):
        """Export the module and its imported device code one library.

        This function only works on host llvm modules.
        It will pack all the imported modules

        Parameters
        ----------
        file_name : str
            The name of the shared library.

        fcompile : function(target, file_list, kwargs), optional
            Compilation function to use create dynamic library.
            If fcompile has attribute object_format, will compile host library
            to that format. Otherwise, will use default format "o".

        kwargs : dict, optional
            Additional arguments passed to fcompile
        """
        # NOTE: this function depends on contrib library features
        # which are only available in when TVM function is available.
        if _RUNTIME_ONLY:
            raise RuntimeError("Cannot call export_library in runtime only mode")
        # Extra dependencies during runtime.
        from pathlib import Path
        from tvm.contrib import cc as _cc, tar as _tar, util as _util

        if isinstance(file_name, Path):
            file_name = str(file_name)

        if self.type_key == "stackvm":
            if not file_name.endswith(".stackvm"):
                raise ValueError("Module[%s]: can only be saved as stackvm format."
                                 "did you build with LLVM enabled?" % self.type_key)
            self.save(file_name)
            return

        modules = self._collect_dso_modules()
        temp = _util.tempdir()
        files = []
        is_system_lib = False
        has_c_module = False
        llvm_target_triple = None
        for index, module in enumerate(modules):
            if fcompile is not None and hasattr(fcompile, "object_format"):
                object_format = fcompile.object_format
            else:
                if module.type_key == "llvm":
                    object_format = "o"
                else:
                    assert module.type_key == "c"
                    object_format = "cc"
                    has_c_module = True
            path_obj = temp.relpath("lib" + str(index) + "." + object_format)
            module.save(path_obj)
            files.append(path_obj)
            is_system_lib = (module.type_key == "llvm" and
                             module.get_function("__tvm_is_system_module")())
            llvm_target_triple = (module.type_key == "llvm" and
                                  module.get_function("_get_target_triple")())
        if not fcompile:
            if file_name.endswith(".tar"):
                fcompile = _tar.tar
            else:
                fcompile = _cc.create_shared

        if llvm_target_triple is None and hasattr(fcompile, "get_target_triple"):
            llvm_target_triple = fcompile.get_target_triple()

        if self.imported_modules:
            if enabled("llvm") and llvm_target_triple:
                path_obj = temp.relpath("devc.o")
                m = _ffi_api.ModulePackImportsToLLVM(self, is_system_lib, llvm_target_triple)
                m.save(path_obj)
                files.append(path_obj)
            else:
                path_cc = temp.relpath("devc.cc")
                with open(path_cc, "w") as f:
                    f.write(_ffi_api.ModulePackImportsToC(self, is_system_lib))
                files.append(path_cc)

        if has_c_module:
            options = []
            if "options" in kwargs:
                opts = kwargs["options"]
                options = opts if isinstance(opts, (list, tuple)) else [opts]
            opts = options + ["-I" + path for path in find_include_path()]
            kwargs.update({'options': opts})

        fcompile(file_name, files, **kwargs)

def load_module(path, fmt=""):
    """Load module from file.

    Parameters
    ----------
    path : str
        The path to the module file.

    fmt : str, optional
        The format of the file, if not specified
        it will be inferred from suffix of the file.

    Returns
    -------
    module : runtime.Module
        The loaded module

    Note
    ----
    This function will automatically call
    cc.create_shared if the path is in format .o or .tar
    """
    # High level handling for .o and .tar file.
    # We support this to be consistent with RPC module load.
    if path.endswith(".o"):
        # Extra dependencies during runtime.
        from tvm.contrib import cc as _cc
        _cc.create_shared(path + ".so", path)
        path += ".so"
    elif path.endswith(".tar"):
        # Extra dependencies during runtime.
        from tvm.contrib import cc as _cc, util as _util, tar as _tar
        tar_temp = _util.tempdir(custom_path=path.replace('.tar', ''))
        _tar.untar(path, tar_temp.temp_dir)
        files = [tar_temp.relpath(x) for x in tar_temp.listdir()]
        _cc.create_shared(path + ".so", files)
        path += ".so"
    # TODO(weberlo): we should probably use a more distinctive suffix for uTVM object files
    elif path.endswith(".obj"):
        fmt = "micro_dev"
    # Redirect to the load API
    return _ffi_api.ModuleLoadFromFile(path, fmt)

The function from module.so in Graph Runtime is called primitive name section in Relay VM. The following is the official document.

For kernels, we can conveniently leverage existing TVM infra to save and load the compiled library module. Here we only focus on serializing other several components in a binary format that is organized with the following sections in order.

Global section. This section contains the globals (function names) used by the virtual machine.

Constant section. This section is used to store the constant pool (i.e. weights of the model) for a virtual machine.

Primitive name section. This section is introduced to accommodate the list of primitive operator names that will be invoked by the virtual machine, i.e. the names starting with fused_. The primitive names are used as symbols to look up function pointers in the compiled kernel library.

Code section. The VM functions, including bytecode, are sitting in this section. The dispatching loop iterates through this section to fetch instructions for execution.

Hence, unlike the graph runtime artifact that contains weight (.params), graph json (.json), and compiled kernel library (.so), the serialized executable artifact is composed of the Relay object file (.ro) and the compiled kernel library (.so).

@hht, I have tried the similar way, but there is some problem to cross compile the final module.

I will explain the scenario in more details: I am trying to use the external codegen (similar implementation like dnnl) to generate the code for my model testcase. And the modules have to be cross compiled. I used “llvm -target=arm-none-eabi” in relay.build(). When I am doing export_library(), there will be 2 modules. One of that is llvm and other is C module (generated from the external codegen) as below:

############ Module: Module(llvm, 1d78ff8)                                           
index: 0                                                                             
fcompile: False                                                                      
has_c_module: False                                                                  
object_format: o                                                                     
path_obj: /tmp/tmpil2gqvka/lib0.o                                                    
files: ['/tmp/tmpil2gqvka/lib0.o']                                                   
############ Module: Module(c, 1c87658)                                              
index: 1                                                                             
fcompile: False                                                                      
has_c_module: True                                                                   
object_format: cc                                                                    
path_obj: /tmp/tmpil2gqvka/lib1.cc                                                   
files: ['/tmp/tmpil2gqvka/lib0.o', '/tmp/tmpil2gqvka/lib1.cc']   

When I use the cc.cross_compile() with gcc compiler everything works fine. But I am facing problem when I compile using arm compiler.

In this case, my understanding is, if I can convert the second module (c module) to an llvm module (llvm arm), then both I can compile using LLVM like lib.save().

Could you please check my method is correct or is there any alternative for doing the same?

Thanks and Regards, Albin

I think it may have nothing to do with the import c module. When compiling these modules, the program firstly stores the .o or .cc into /tmp according to the module.type_key. Then the program just compiles these files together.

        if has_c_module:
            options = []
            if "options" in kwargs:
                opts = kwargs["options"]
                options = opts if isinstance(opts, (list, tuple)) else [opts]
            opts = options + ["-I" + path for path in find_include_path()]
            kwargs.update({'options': opts})

        fcompile(file_name, files, **kwargs)

I understand your point, but can we perform LLVM arm compilation (cross compiling) at this stage instead of gcc compilation?

I see. You mean everything works fine with arm-none-eabi-gcc as the cross compiler?

Sorry, may be my explanation confused you. I mean everything works file with linux gcc compiler

'g++', '-shared', '-fPIC', '-o', '/tmp/tmp0spgzwur/lib.so', '/tmp/tmpil2gqvka/lib0.o', '/tmp/tmpil2gqvka/lib1.cc', '/tmp/tmpil2gqvka/devc.cc', '-O2', '-std=c++11' .......

But if I have to cross compile, eg if I have to compile with llvm -target=arm-none-eabi, how can we do this in my case there one module is llvm and another is C?

It is beyond my ablity. If change the cmd manually, it may cause new problems. I am sorry.

def _linux_compile(output, objects, options, compile_cmd="g++"):
    cmd = [compile_cmd]
    if output.endswith(".so") or output.endswith(".dylib"):
        cmd += ["-shared", "-fPIC"]
        if sys.platform == "darwin":
            cmd += ["-undefined", "dynamic_lookup"]
    elif output.endswith(".obj"):
        cmd += ["-c"]
    cmd += ["-o", output]
    if isinstance(objects, str):
        cmd += [objects]
    else:
        cmd += objects
    if options:
        cmd += options
    proc = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    (out, _) = proc.communicate()
    if proc.returncode != 0:
        msg = "Compilation error:\n"
        msg += py_str(out)
        raise RuntimeError(msg)