Subzero: fix external function calls on x86-32

On x86-32, the CALL instruction takes an IP-relative target address.
These are emitted as relocatable addresses, which we fix up in
rr::loadImage in SubzeroReactor.cpp. The problem is that we were calling
loadImage multiple times when compiling Coroutine functions - three
times, in fact, since there are three Coroutine functions (create,
await, destroy). As such, we would fixup the same relocatable addresses
multiple times, but the fixup uses the existing value, so it would
become invalid after the second fixup. To fix this, we now only call
loadImage once, collecting all entry points at once.

Also opportunistically fixed a missing argument to coro::isDone, and
added a static_assert to sz::Call to avoid this happening again.

Bug: angleproject:4482
Change-Id: If847d02649152abc1a9d8476023dd240465fa736
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/42448
Presubmit-Ready: Antonio Maiorano <amaiorano@google.com>
Tested-by: Antonio Maiorano <amaiorano@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 9a83795..4607f9c 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -194,6 +194,8 @@
 template<typename Return, typename... CArgs, typename... RArgs>
 Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
 {
+	static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
+
 	Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
 	std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
 	return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
@@ -656,13 +658,23 @@
 	return symbolValue;
 }
 
-void *loadImage(uint8_t *const elfImage, size_t &codeSize, const char *functionName = nullptr)
+struct EntryPoint
 {
+	const void *entry;
+	size_t codeSize = 0;
+};
+
+std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
+{
+	ASSERT(functionNames.size() > 0);
+	std::vector<EntryPoint> entryPoints(functionNames.size());
+
 	ElfHeader *elfHeader = (ElfHeader *)elfImage;
 
+	// TODO: assert?
 	if(!elfHeader->checkMagic())
 	{
-		return nullptr;
+		return {};
 	}
 
 	// Expect ELF bitness to match platform
@@ -682,7 +694,6 @@
 #endif
 
 	SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
-	void *entry = nullptr;
 
 	for(int i = 0; i < elfHeader->e_shnum; i++)
 	{
@@ -690,17 +701,25 @@
 		{
 			if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
 			{
-				auto getCurrSectionName = [&]() {
+				auto findSectionNameEntryIndex = [&]() -> size_t {
 					auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
-					return reinterpret_cast<const char *>(elfImage + sectionNameOffset);
-				};
-				if(functionName && strstr(getCurrSectionName(), functionName) == nullptr)
-				{
-					continue;
-				}
+					const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
 
-				entry = elfImage + sectionHeader[i].sh_offset;
-				codeSize = sectionHeader[i].sh_size;
+					for(size_t j = 0; j < functionNames.size(); ++j)
+					{
+						if(strstr(sectionName, functionNames[j]) != nullptr)
+						{
+							return j;
+						}
+					}
+
+					UNREACHABLE("Failed to find executable section that matches input function names");
+					return static_cast<size_t>(-1);
+				};
+
+				size_t index = findSectionNameEntryIndex();
+				entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
+				entryPoints[index].codeSize = sectionHeader[i].sh_size;
 			}
 		}
 		else if(sectionHeader[i].sh_type == SHT_REL)
@@ -725,7 +744,7 @@
 		}
 	}
 
-	return entry;
+	return entryPoints;
 }
 
 template<typename T>
@@ -796,18 +815,20 @@
 
 	void seek(uint64_t Off) override { position = Off; }
 
-	const void *getEntryByName(const char *name)
+	std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
 	{
-		size_t codeSize = 0;
-		const void *entry = loadImage(&buffer[0], codeSize, name);
+		auto entryPoints = loadImage(&buffer[0], functionNames);
 
 #if defined(_WIN32)
 		FlushInstructionCache(GetCurrentProcess(), NULL, 0);
 #else
-		__builtin___clear_cache((char *)entry, (char *)entry + codeSize);
+		for(auto &entryPoint : entryPoints)
+		{
+			__builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
+		}
 #endif
 
-		return entry;
+		return entryPoints;
 	}
 
 	void finalize()
@@ -1026,10 +1047,11 @@
 	objectWriter->writeNonUserSections();
 
 	// Done compiling functions, get entry pointers to each of them
-	for(size_t i = 0; i < Count; ++i)
+	auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
+	ASSERT(entryPoints.size() == Count);
+	for(size_t i = 0; i < entryPoints.size(); ++i)
 	{
-		const void *entry = ::routine->getEntryByName(names[i]);
-		::routine->setEntry(i, entry);
+		::routine->setEntry(i, entryPoints[i].entry);
 	}
 
 	::routine->finalize();
@@ -4719,7 +4741,7 @@
 		//         <resumeBlock>
 		//     }
 		Ice::CfgNode *bb = awaitFunc->getEntryNode();
-		Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone);
+		Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
 		auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
 		bb->appendInst(br);