LLVM: add option to emit asm file to aid debugging

Defining ENABLE_RR_EMIT_ASM_FILE will make it so that the LLVM backend
outputs a unique asm file per routine that is generated. This file is
further processed and updated so that each instruction is prefixed with
the final resolved memory location.

This is useful, for instance, when we get JIT callstacks and can't
easily figure out which code it maps to. Furthermore, when this feature
is coupled with ENABLE_RR_DEBUG_INFO, the emitted asm includes source
location (file and line) information, making it easy to correlate the
asm to the Reactor code.

For example, running ReactorUnitTests.Sample with both
ENABLE_RR_EMIT_ASM_FILE and ENABLE_RR_DEBUG_INFO enabled generates a
file named swiftshader_jit_llvm_0000_ReactorUnitTests_Sample.asm, with
partial output like so:

```
	.file	2 "C:\\src\\SwiftShader2\\tests\\ReactorUnitTests\\ReactorUnitTests.cpp\\<unknown>"
	.loc	2 53 0 prologue_end     # <unknown>:53:0
[0x2B9D3358004] 	mov	qword ptr [rsp + 64], rcx # encoding: [0x48,0x89,0x4c,0x24,0x40]
[0x2B9D3358009] 	mov	qword ptr [rsp + 72], rcx # encoding: [0x48,0x89,0x4c,0x24,0x48]
	.loc	2 54 0                  # <unknown>:54:0
[0x2B9D335800E] 	mov	eax, dword ptr [rcx - 4] # encoding: [0x8b,0x41,0xfc]
[0x2B9D3358011] 	mov	dword ptr [rsp + 8], eax # encoding: [0x89,0x44,0x24,0x08]
[0x2B9D3358015] 	mov	dword ptr [rsp + 16], eax # encoding: [0x89,0x44,0x24,0x10]
	.loc	2 55 0                  # <unknown>:55:0
[0x2B9D3358019] 	mov	dword ptr [rsp + 12], edx # encoding: [0x89,0x54,0x24,0x0c]
[0x2B9D335801D] 	mov	dword ptr [rsp + 20], edx # encoding: [0x89,0x54,0x24,0x14]
```

The "53", "54", and "55" are the line numbers of the respective Reactor
code in ReactorUnitTests.cpp.

CMake: enable REACTOR_EMIT_ASM_FILE to enable this feature.

Bug: b/174358505
Change-Id: I613a25fe0354a1343c49cb399875e82d5e806e29
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/50750
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Antonio Maiorano <amaiorano@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9268095..ebe5246 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -139,6 +139,7 @@
 option_if_not_defined(SWIFTSHADER_DCHECK_ALWAYS_ON "Check validation macros even in release builds" FALSE)
 option_if_not_defined(REACTOR_EMIT_DEBUG_INFO "Emit debug info for JIT functions" FALSE)
 option_if_not_defined(REACTOR_EMIT_PRINT_LOCATION "Emit printing of location info for JIT functions" FALSE)
+option_if_not_defined(REACTOR_EMIT_ASM_FILE "Emit asm files for JIT functions" FALSE)
 option_if_not_defined(REACTOR_ENABLE_PRINT "Enable RR_PRINT macros" FALSE)
 option_if_not_defined(REACTOR_VERIFY_LLVM_IR "Check reactor-generated LLVM IR is valid even in release builds" FALSE)
 option_if_not_defined(SWIFTSHADER_LESS_DEBUG_INFO "Generate less debug info to reduce file size" FALSE)
@@ -566,6 +567,10 @@
     list(APPEND SWIFTSHADER_COMPILE_OPTIONS "-DENABLE_RR_EMIT_PRINT_LOCATION")
 endif()
 
+if(REACTOR_EMIT_ASM_FILE)
+    list(APPEND SWIFTSHADER_COMPILE_OPTIONS "-DENABLE_RR_EMIT_ASM_FILE")
+endif()
+
 if(REACTOR_EMIT_DEBUG_INFO)
     message(WARNING "REACTOR_EMIT_DEBUG_INFO is enabled. This will likely affect performance.")
     list(APPEND SWIFTSHADER_COMPILE_OPTIONS "-DENABLE_RR_DEBUG_INFO")
diff --git a/src/Reactor/CMakeLists.txt b/src/Reactor/CMakeLists.txt
index ea450d7..0717fb9 100644
--- a/src/Reactor/CMakeLists.txt
+++ b/src/Reactor/CMakeLists.txt
@@ -43,6 +43,8 @@
 set(LLVM_SRC_FILES
     CPUID.cpp
     CPUID.hpp
+    LLVMAsm.cpp
+    LLVMAsm.hpp
     LLVMJIT.cpp
     LLVMReactor.cpp
     LLVMReactor.hpp
diff --git a/src/Reactor/LLVMAsm.cpp b/src/Reactor/LLVMAsm.cpp
new file mode 100644
index 0000000..e7b19e6
--- /dev/null
+++ b/src/Reactor/LLVMAsm.cpp
@@ -0,0 +1,180 @@
+// Copyright 2020 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "LLVMAsm.hpp"
+
+#ifdef ENABLE_RR_EMIT_ASM_FILE
+
+#	include "Debug.hpp"
+#	include "llvm/IR/LegacyPassManager.h"
+#	include "llvm/Support/CommandLine.h"
+#	include <fstream>
+#	include <iomanip>
+#	include <regex>
+#	include <sstream>
+
+namespace {
+bool initAsmOutputOptionsOnce()
+{
+	// Use a static immediately invoked lambda to make this thread safe
+	static auto initialized = []() {
+		const char *argv[] = {
+			"Reactor",
+			"-x86-asm-syntax", "intel"  // Use Intel syntax rather than the default AT&T
+		};
+		llvm::cl::ParseCommandLineOptions(sizeof(argv) / sizeof(argv[0]), argv);
+		return true;
+	}();
+
+	return initialized;
+}
+}  // namespace
+
+namespace rr {
+namespace AsmFile {
+
+std::string generateFilename(std::string routineName)
+{
+	// Names from gtests sometimes have invalid file name characters
+	std::replace(routineName.begin(), routineName.end(), '/', '_');
+
+	static size_t counter = 0;
+	std::stringstream f;
+	f << "swiftshader_jit_llvm_" << std::setfill('0') << std::setw(4) << counter++ << "_" << routineName << ".asm";
+	return f.str();
+}
+
+bool emitAsmFile(const std::string &filename, llvm::orc::JITTargetMachineBuilder builder, llvm::Module &module)
+{
+	initAsmOutputOptionsOnce();
+
+	auto targetMachine = builder.createTargetMachine();
+	if(!targetMachine)
+		return false;
+
+	auto fileType = llvm::CGFT_AssemblyFile;
+	std::error_code EC;
+	llvm::raw_fd_ostream dest(filename, EC, llvm::sys::fs::OF_None);
+	ASSERT(!EC);
+	llvm::legacy::PassManager pm;
+	auto &options = targetMachine.get()->Options.MCOptions;
+	options.ShowMCEncoding = true;
+	options.AsmVerbose = true;
+	targetMachine.get()->addPassesToEmitFile(pm, dest, nullptr, fileType);
+	pm.run(module);
+	return true;
+}
+
+void fixupAsmFile(const std::string &filename, std::vector<const void *> addresses)
+{
+	// Read input asm file into memory so we can overwrite it. This also allows us to merge multiline
+	// comments into a single line for easier parsing below.
+	std::vector<std::string> lines;
+	{
+		std::ifstream fin(filename);
+		std::string line;
+		while(std::getline(fin, line))
+		{
+			auto firstChar = [&] {
+				auto index = line.find_first_not_of(" \t");
+				if(index == std::string::npos)
+					return '\n';
+				return line[index];
+			};
+
+			if(!lines.empty() && firstChar() == '#')
+			{
+				lines.back() += line;
+			}
+			else
+			{
+				lines.push_back(line);
+			}
+		}
+	}
+
+	std::ofstream fout(filename);
+
+	// Output function table
+	fout << "\nFunction Addresses:\n";
+	for(size_t i = 0; i < addresses.size(); i++)
+	{
+		fout << "f" << i << ": " << addresses[i] << "\n";
+	}
+	fout << "\n";
+
+	size_t functionIndex = ~0;
+	size_t instructionAddress = 0;
+
+	for(auto &line : lines)
+	{
+		size_t pos{};
+
+		if(line.find("# -- Begin function") != std::string::npos)
+		{
+			++functionIndex;
+
+			if(functionIndex < addresses.size())
+			{
+				instructionAddress = (size_t)addresses[functionIndex];
+			}
+			else
+			{
+				// For coroutines, more functions are compiled than the top-level three.
+				// For now, just output 0-based instructions.
+				instructionAddress = 0;
+			}
+		}
+
+		// Handle alignment directives by aligning the instruction address. When lowered, these actually
+		// map to a nops to pad to the next aligned address.
+		pos = line.find(".p2align");
+		if(pos != std::string::npos)
+		{
+			// This assumes GNU asm format (https://sourceware.org/binutils/docs/as/P2align.html#P2align)
+			static std::regex reAlign(R"(.*\.p2align.*([0-9]+).*)");
+			std::smatch matches;
+			auto found = std::regex_search(line, matches, reAlign);
+			ASSERT(found);
+			auto alignPow2 = std::stoi(matches[1]);
+			auto align = 1 << alignPow2;
+			instructionAddress = (instructionAddress + align - 1) & ~(align - 1);
+		}
+
+		// Detect instruction lines and prepend the location (address)
+		pos = line.find("encoding: [");
+		if(pos != std::string::npos)
+		{
+			// Determine offset of next instruction (size of this instruction in bytes)
+			// e.g. # encoding: [0x48,0x89,0x4c,0x24,0x40]
+			// Count number of commas in the array + 1
+			auto endPos = line.find("]", pos);
+			auto instructionSize = 1 + std::count_if(line.begin() + pos, line.begin() + endPos, [](char c) { return c == ','; });
+
+			// Prepend current location to instruction line
+			std::stringstream location;
+			location << "[0x" << std::uppercase << std::hex << instructionAddress << "] ";
+			line = location.str() + line;
+
+			instructionAddress += instructionSize;
+		}
+
+		fout << line + "\n";
+	}
+}
+
+}  // namespace AsmFile
+}  // namespace rr
+
+#endif  // ENABLE_RR_EMIT_ASM_FILE
diff --git a/src/Reactor/LLVMAsm.hpp b/src/Reactor/LLVMAsm.hpp
new file mode 100644
index 0000000..b4dce34
--- /dev/null
+++ b/src/Reactor/LLVMAsm.hpp
@@ -0,0 +1,45 @@
+// Copyright 2020 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef rr_LLVMAsm_hpp
+#define rr_LLVMAsm_hpp
+
+#ifdef ENABLE_RR_EMIT_ASM_FILE
+
+#	include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#	include <string>
+#	include <vector>
+
+namespace rr {
+namespace AsmFile {
+
+// Generate a unique name for the asm file
+std::string generateFilename(std::string routineName);
+
+// Emit an asm file for the current module
+bool emitAsmFile(const std::string &filename, llvm::orc::JITTargetMachineBuilder builder, llvm::Module &module);
+
+// Rewrites the previously generated asm file, adding extra useful information.
+// In particular, it prepends the final resolved location (address) of each instruction.
+// NOTE: Doing this is error-prone since we parse text, and are thus dependent on the
+// exact format of LLVM's assembly output. It would be nice if LLVM's asm output included
+// at least the 0-based relative address of each instruction.
+void fixupAsmFile(const std::string &filename, std::vector<const void *> addresses);
+
+}  // namespace AsmFile
+}  // namespace rr
+
+#endif  // ENABLE_RR_EMIT_ASM_FILE
+
+#endif  // rr_LLVMAsm_hpp
diff --git a/src/Reactor/LLVMJIT.cpp b/src/Reactor/LLVMJIT.cpp
index dd2ac72..ff373b2 100644
--- a/src/Reactor/LLVMJIT.cpp
+++ b/src/Reactor/LLVMJIT.cpp
@@ -16,6 +16,7 @@
 
 #include "Debug.hpp"
 #include "ExecutableMemory.hpp"
+#include "LLVMAsm.hpp"
 #include "Routine.hpp"
 
 // TODO(b/143539525): Eliminate when warning has been fixed.
@@ -687,6 +688,11 @@
 			names[i] = mangle(func->getName());
 		}
 
+#ifdef ENABLE_RR_EMIT_ASM_FILE
+		const auto asmFilename = rr::AsmFile::generateFilename(name);
+		rr::AsmFile::emitAsmFile(asmFilename, JITGlobals::get()->getTargetMachineBuilder(config.getOptimization().getLevel()), *module);
+#endif
+
 		// Once the module is passed to the compileLayer, the
 		// llvm::Functions are freed. Make sure funcs are not referenced
 		// after this point.
@@ -702,6 +708,10 @@
 			           (int)i, llvm::toString(symbol.takeError()).c_str());
 			addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(symbol->getAddress()));
 		}
+
+#ifdef ENABLE_RR_EMIT_ASM_FILE
+		rr::AsmFile::fixupAsmFile(asmFilename, addresses);
+#endif
 	}
 
 	~JITRoutine()