llc指令用于将LLVM源输入编译成特定架构的汇编语言,然后,汇编语言输出可以通过本机汇编器和链接器来生成本机可执行文件。输出汇编代码的体系结构选择是从输入文件自动确定的,除非使用该-march选项覆盖默认值。
-O=uint //指定clang优化级别生成代码 -O0,-O1,-O2,-O3
-march=<arch> //指定架构生产汇编
-filetype=<output file type> //指定输出类型,.s汇编或.o目标文件
具体使用及配置可以查看官网查看。
以下是对其源码的解析:
int main(int argc, char **argv) {
//初始化,作用:1。设置一个信号处理程序,以便在进程崩溃时追踪堆栈,2.创建全局的handler用于当内存分配失败时调用它 3.在Windows平台可通过命令行获取utf-8编码参数
InitLLVM X(argc, argv);
// Enable debug stream buffering.
EnableDebugBuffering = true;
// 上下文,拥有并管理了LLVM core内部的全局数据,包括类型和常量表。但LLVMContext本身不提供锁保证,所以应该注意每个线程对应一个上下文
LLVMContext Context;
// Initialize targets first, so that --version shows registered targets.
// 初始化目标机器,初始化对应机器码,以及支持asm打印 具体类型可查看Targets.def
InitializeAllTargets();
InitializeAllTargetMCs();
InitializeAllAsmPrinters();
InitializeAllAsmParsers();
// PassRegistry用于注册和初始化pass子系统,并帮助PassManager解析pass依赖关系。 初始化 codegen 以及 IR passes 以便 -print-after,
// -print-before, and -stop-after 选项正常工作.
PassRegistry *Registry = PassRegistry::getPassRegistry();
initializeCore(*Registry);
initializeCodeGen(*Registry);
initializeLoopStrengthReducePass(*Registry);
initializeLowerIntrinsicsPass(*Registry);
initializeEntryExitInstrumenterPass(*Registry);
initializePostInlineEntryExitInstrumenterPass(*Registry);
initializeUnreachableBlockElimLegacyPassPass(*Registry);
initializeConstantHoistingLegacyPassPass(*Registry);
initializeScalarOpts(*Registry);
initializeVectorization(*Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
initializeExpandReductionsPass(*Registry);
initializeHardwareLoopsPass(*Registry);
initializeTransformUtils(*Registry);
// Initialize debugging passes.
initializeScavengerTestPass(*Registry);
// Register the target printer for --version.
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
// 设置除GlobalValue之外的Value是否保留name
Context.setDiscardValueNames(DiscardValueNames);
// Set a diagnostic handler that doesn't exit on the first error
bool HasError = false;
Context.setDiagnosticHandler(
std::make_unique<LLCDiagnosticHandler>(&HasError));
Context.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, &HasError);
Expected<std::unique_ptr<ToolOutputFile>> RemarksFileOrErr =
setupLLVMOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
RemarksFormat, RemarksWithHotness,
RemarksHotnessThreshold);
if (Error E = RemarksFileOrErr.takeError()) {
WithColor::error(errs(), argv[0]) << toString(std::move(E)) << '\n';
return 1;
}
std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr);
// 判断input是否正确
if (InputLanguage != "" && InputLanguage != "ir" &&
InputLanguage != "mir") {
WithColor::error(errs(), argv[0])
<< "input language must be '', 'IR' or 'MIR'\n";
return 1;
}
// 重复编译N次来给出更好的时间指标
for (unsigned I = TimeCompilations; I; --I)
// 编译子模块, 具体实现在下一段
if (int RetVal = compileModule(argv, Context))
return RetVal;
if (RemarksFile)
// 指示此输出文件的工具作业已成功,并且不应删除该文件
RemarksFile->keep();
return 0;
}
static int compileModule(char **argv, LLVMContext &Context) {
// Load the module to be compiled...
// 异常诊断
SMDiagnostic Err;
std::unique_ptr<Module> M;
// 用于从MIR文件加载状态来初始化机器函数
std::unique_ptr<MIRParser> MIR;
// 用于处理autoconf配置
Triple TheTriple;
std::string CPUStr = codegen::getCPUStr(),
FeaturesStr = codegen::getFeaturesStr();
// 通过命令行参数设置从MIR加载函数的属性
auto setMIRFunctionAttributes = [&CPUStr, &FeaturesStr](Function &F) {
codegen::setFunctionAttributes(CPUStr, FeaturesStr, F);
};
auto MAttrs = codegen::getMAttrs();
bool SkipModule = codegen::getMCPU() == "help" ||
(!MAttrs.empty() && MAttrs.front() == "help");
// 设置优化级别
CodeGenOpt::Level OLvl = CodeGenOpt::Default;
switch (OptLevel) {
default:
WithColor::error(errs(), argv[0]) << "invalid optimization level.\n";
return 1;
case ' ': break;
case '0': OLvl = CodeGenOpt::None; break;
case '1': OLvl = CodeGenOpt::Less; break;
case '2': OLvl = CodeGenOpt::Default; break;
case '3': OLvl = CodeGenOpt::Aggressive; break;
}
// 设置编译目标参数
TargetOptions Options;
auto InitializeOptions = [&](const Triple &TheTriple) {
Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple);
Options.DisableIntegratedAS = NoIntegratedAssembler;
Options.MCOptions.ShowMCEncoding = ShowMCEncoding;
Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory;
Options.MCOptions.AsmVerbose = AsmVerbose;
Options.MCOptions.PreserveAsmComments = PreserveComments;
Options.MCOptions.IASSearchPaths = IncludeDirs;
Options.MCOptions.SplitDwarfFile = SplitDwarfFile;
};
Optional<Reloc::Model> RM = codegen::getExplicitRelocModel();
const Target *TheTarget = nullptr;
std::unique_ptr<TargetMachine> Target;
// 如果用户只是想列出可用的选项,跳过模块加载
if (!SkipModule) {
auto SetDataLayout =
[&](StringRef DataLayoutTargetTriple) -> Optional<std::string> {
// If we are supposed to override the target triple, do so now.
std::string IRTargetTriple = DataLayoutTargetTriple.str();
if (!TargetTriple.empty())
IRTargetTriple = Triple::normalize(TargetTriple);
TheTriple = Triple(IRTargetTriple);
if (TheTriple.getTriple().empty())
TheTriple.setTriple(sys::getDefaultTargetTriple());
// 根据名称查找到要编译的指定架构,因为可能存在后端没有映射到目标triple的情况
std::string Error;
TheTarget =
TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error);
if (!TheTarget) {
WithColor::error(errs(), argv[0]) << Error;
exit(1);
}
// On AIX, setting the relocation model to anything other than PIC is
// considered a user error.
if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_) {
WithColor::error(errs(), argv[0])
<< "invalid relocation model, AIX only supports PIC.\n";
exit(1);
}
// 根据配置创建对应目标机器
InitializeOptions(TheTriple);
Target = std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM,
codegen::getExplicitCodeModel(), OLvl));
assert(Target && "Could not allocate target machine!");
return Target->createDataLayout().getStringRepresentation();
};
// 根据输入的文件格式解析生成Module
if (InputLanguage == "mir" ||
(InputLanguage == "" && StringRef(InputFilename).endswith(".mir"))) {
MIR = createMIRParserFromFile(InputFilename, Err, Context,
setMIRFunctionAttributes);
if (MIR)
M = MIR->parseIRModule(SetDataLayout);
} else {
M = parseIRFile(InputFilename, Err, Context, SetDataLayout);
}
if (!M) {
Err.print(argv[0], WithColor::error(errs(), argv[0]));
return 1;
}
if (!TargetTriple.empty())
M->setTargetTriple(Triple::normalize(TargetTriple));
} else {
// 加载help选项case
TheTriple = Triple(Triple::normalize(TargetTriple));
if (TheTriple.getTriple().empty())
TheTriple.setTriple(sys::getDefaultTargetTriple());
// 根据名称查找到要编译的指定架构
std::string Error;
TheTarget =
TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error);
if (!TheTarget) {
WithColor::error(errs(), argv[0]) << Error;
return 1;
}
// On AIX, setting the relocation model to anything other than PIC is
// considered a user error. (AIX是IBM专有的UNIX操作系统)
if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_) {
WithColor::error(errs(), argv[0])
<< "invalid relocation model, AIX only supports PIC.\n";
return 1;
}
// 根据配置创建对应目标机器
InitializeOptions(TheTriple);
Target = std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM,
codegen::getExplicitCodeModel(), OLvl));
assert(Target && "Could not allocate target machine!");
// If we don't have a module then just exit now. We do this down
// here since the CPU/Feature help is underneath the target machine
// creation.
return 0;
}
assert(M && "Should have exited if we didn't have a module!");
if (codegen::getFloatABIForCalls() != FloatABI::Default)
Options.FloatABIType = codegen::getFloatABIForCalls();
// Figure out where we are going to send the output.
std::unique_ptr<ToolOutputFile> Out =
GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]);
if (!Out) return 1;
// 是否需要将dwarf文件单纯拆出到文件流
std::unique_ptr<ToolOutputFile> DwoOut;
if (!SplitDwarfOutputFile.empty()) {
std::error_code EC;
DwoOut = std::make_unique<ToolOutputFile>(SplitDwarfOutputFile, EC,
sys::fs::OF_None);
if (EC) {
WithColor::error(errs(), argv[0]) << EC.message() << '\n';
return 1;
}
}
///*核心*
// Build up all of the passes that we want to do to the module.
legacy::PassManager PM;
// Add an appropriate TargetLibraryInfo pass for the module's triple.
TargetLibraryInfoImpl TLII(Triple(M->getTargetTriple()));
// The -disable-simplify-libcalls flag actually disables all builtin optzns.
if (DisableSimplifyLibCalls)
TLII.disableAllFunctions();
PM.add(new TargetLibraryInfoWrapperPass(TLII));
// 立即验证模块,以便在任何pass调用doInitialization()之前捕获异常
if (!NoVerify && verifyModule(*M, &errs())) {
std::string Prefix =
(Twine(argv[0]) + Twine(": ") + Twine(InputFilename)).str();
WithColor::error(errs(), Prefix) << "input module is broken!\n";
return 1;
}
// Override function attributes based on CPUStr, FeaturesStr, and command line
// flags.
codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M);
// 文件类型不是obj但是配置了-mc-relax-all时给出一个警告
if (mc::getExplicitRelaxAll() && codegen::getFileType() != CGFT_ObjectFile)
WithColor::warning(errs(), argv[0])
<< ": warning: ignoring -mc-relax-all because filetype != obj";
{
raw_pwrite_stream *OS = &Out->os();
// Manually do the buffering rather than using buffer_ostream,
// so we can memcmp the contents in CompileTwice mode
SmallVector<char, 0> Buffer;
std::unique_ptr<raw_svector_ostream> BOS;
if ((codegen::getFileType() != CGFT_AssemblyFile &&
!Out->os().supportsSeeking()) ||
CompileTwice) {
BOS = std::make_unique<raw_svector_ostream>(Buffer);
OS = BOS.get();
}
const char *argv0 = argv[0];
// LLVMTargetMachine继承自TargetMachine,然后继承出每种详细架构的目标机器,用于实现目标独立的代码生成器
LLVMTargetMachine &LLVMTM = static_cast<LLVMTargetMachine &>(*Target);
MachineModuleInfoWrapperPass *MMIWP =
new MachineModuleInfoWrapperPass(&LLVMTM);
//如果包含了自定义的pass,则构建自定义pass通道,在选择指令之后开始
if (!RunPassNames->empty()) {
if (!MIR) {
WithColor::warning(errs(), argv[0])
<< "run-pass is for .mir file only.\n";
return 1;
}
// Target-Independent的配置,用于将options配置到CodeGen的其他pass中
TargetPassConfig &TPC = *LLVMTM.createPassConfig(PM);
if (TPC.hasLimitedCodeGenPipeline()) {
WithColor::warning(errs(), argv[0])
<< "run-pass cannot be used with "
<< TPC.getLimitedCodeGenPipelineReason(" and ") << ".\n";
return 1;
}
TPC.setDisableVerify(NoVerify);
PM.add(&TPC);
PM.add(MMIWP);
//dump并验证机器函数
TPC.printAndVerify("");
for (const std::string &RunPassName : *RunPassNames) {
if (addPass(PM, argv0, RunPassName, TPC))
return 1;
}
// 标示所有pass配置完成
TPC.setInitialized();
// 将IR按MIR格式序列打印输出
PM.add(createPrintMIRPass(*OS));
// 释放MachineFunction占用的内存
PM.add(createFreeMachineFunctionPass());
// 添加pass到指定的pass管理器,以生成指定的文件
} else if (Target->addPassesToEmitFile(
PM, *OS, DwoOut ? &DwoOut->os() : nullptr,
codegen::getFileType(), NoVerify, MMIWP)) {
WithColor::warning(errs(), argv[0])
<< "target does not support generation of this"
<< " file type!\n";
return 1;
}
const_cast<TargetLoweringObjectFile *>(LLVMTM.getObjFileLowering())
->Initialize(MMIWP->getMMI().getContext(), *Target);
if (MIR) {
assert(MMIWP && "Forgot to create MMIWP?");
if (MIR->parseMachineFunctions(*M, MMIWP->getMMI()))
return 1;
}
// Before executing passes, print the final values of the LLVM options.
cl::PrintOptionValues();
// If requested, run the pass manager over the same module again,
// to catch any bugs due to persistent state in the passes. Note that
// opt has the same functionality, so it may be worth abstracting this out
// in the future.
SmallVector<char, 0> CompileTwiceBuffer;
if (CompileTwice) {
std::unique_ptr<Module> M2(llvm::CloneModule(*M));
PM.run(*M2);
CompileTwiceBuffer = Buffer;
Buffer.clear();
}
PM.run(*M);
auto HasError =
((const LLCDiagnosticHandler *)(Context.getDiagHandlerPtr()))->HasError;
if (*HasError)
return 1;
// Compare the two outputs and make sure they're the same
if (CompileTwice) {
if (Buffer.size() != CompileTwiceBuffer.size() ||
(memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) !=
0)) {
errs()
<< "Running the pass manager twice changed the output.\n"
"Writing the result of the second run to the specified output\n"
"To generate the one-run comparison binary, just run without\n"
"the compile-twice option\n";
Out->os() << Buffer;
Out->keep();
return 1;
}
}
if (BOS) {
Out->os() << Buffer;
}
}
// Declare success.
Out->keep();
if (DwoOut)
DwoOut->keep();
return 0;
}