Eliminate stores succeeded by another store

When a store is propagated to all stores succeeding it in the same basic
block, and we encounter another store, we can delete the previous one.

This optimizes the StoresInMultipleBlocks test's generated code from:

 sub         rsp,38h
 mov         dword ptr [rsp],0Dh
 cmp         ecx,0
 je          a
 mov         dword ptr [rsp],4
 mov         eax,4
 add         eax,3
 mov         dword ptr [rsp],eax
b:
 mov         eax,dword ptr [rsp]
 add         rsp,38h
 ret
a:
 mov         dword ptr [rsp],6
 mov         eax,6
 add         eax,5
 mov         dword ptr [rsp],eax
 jmp         b

Into:

 sub         rsp,38h
 mov         dword ptr [rsp],0Dh
 cmp         ecx,0
 je          a
 mov         eax,4
 add         eax,3
 mov         dword ptr [rsp],eax
 jmp         b
a:
 mov         eax,6
 add         eax,5
 mov         dword ptr [rsp],eax
b:
 mov         eax,dword ptr [rsp]
 add         rsp,38h
 ret

Bug: b/179668593
Change-Id: I2036d7b7c97be17ce73ff32234fe51841409a20c
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/52568
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/Optimizer.cpp b/src/Reactor/Optimizer.cpp
index abb7d4c..01721e5 100644
--- a/src/Reactor/Optimizer.cpp
+++ b/src/Reactor/Optimizer.cpp
@@ -55,6 +55,7 @@
 	static bool isStore(const Ice::Inst &instruction);
 	static std::size_t storeSize(const Ice::Inst *instruction);
 	static bool loadTypeMatchesStore(const Ice::Inst *load, const Ice::Inst *store);
+	static bool storeTypeMatchesStore(const Ice::Inst *store1, const Ice::Inst *store2);
 
 	void collectDiagnostics();
 
@@ -633,7 +634,15 @@
 	for(Ice::CfgNode *block : function->getNodes())
 	{
 		// For each stack variable keep track of the last store instruction.
-		std::unordered_map<const Ice::InstAlloca *, const Ice::Inst *> lastStoreTo;
+		// To eliminate a store followed by another store to the same alloca address
+		// we must also know whether all loads have been replaced by the store value.
+		struct LastStore
+		{
+			Ice::Inst *store;
+			bool allLoadsReplaced = true;
+		};
+
+		std::unordered_map<const Ice::InstAlloca *, LastStore> lastStoreTo;
 
 		for(Ice::Inst &inst : block->getInsts())
 		{
@@ -652,7 +661,21 @@
 					// a pointer which could be used for indirect stores.
 					if(getUses(address)->areOnlyLoadStore())
 					{
-						lastStoreTo[alloca] = &inst;
+						// If there was a previous store to this address, and it was propagated
+						// to all subsequent loads, it can be eliminated.
+						if(auto entry = lastStoreTo.find(alloca); entry != lastStoreTo.end())
+						{
+							Ice::Inst *previousStore = entry->second.store;
+
+							// TODO(b/179668593): Also eliminate it if the next store is larger in size.
+							if(storeTypeMatchesStore(&inst, previousStore) &&
+							   entry->second.allLoadsReplaced)
+							{
+								deleteInstruction(previousStore);
+							}
+						}
+
+						lastStoreTo[alloca] = { &inst };
 					}
 				}
 			}
@@ -663,7 +686,7 @@
 					auto entry = lastStoreTo.find(alloca);
 					if(entry != lastStoreTo.end())
 					{
-						const Ice::Inst *store = entry->second;
+						const Ice::Inst *store = entry->second.store;
 
 						// Conservatively check that the types match.
 						// TODO(b/179668593): Also valid to propagate if the load is smaller or equal in size to the store.
@@ -671,6 +694,10 @@
 						{
 							replace(&inst, store->getData());
 						}
+						else
+						{
+							entry->second.allLoadsReplaced = false;
+						}
 					}
 				}
 			}
@@ -963,6 +990,29 @@
 	return true;
 }
 
+bool Optimizer::storeTypeMatchesStore(const Ice::Inst *store1, const Ice::Inst *store2)
+{
+	assert(isStore(*store1) && isStore(*store2));
+	assert(store1->getStoreAddress() == store2->getStoreAddress());
+
+	if(store1->getData()->getType() != store2->getData()->getType())
+	{
+		return false;
+	}
+
+	if(auto *storeSubVector1 = asStoreSubVector(store1))
+	{
+		if(auto *storeSubVector2 = asStoreSubVector(store2))
+		{
+			// Check for matching sub-vector width.
+			return llvm::cast<Ice::ConstantInteger32>(storeSubVector1->getSrc(2))->getValue() ==
+			       llvm::cast<Ice::ConstantInteger32>(storeSubVector2->getSrc(2))->getValue();
+		}
+	}
+
+	return true;
+}
+
 void Optimizer::collectDiagnostics()
 {
 	if(report)
diff --git a/tests/ReactorUnitTests/ReactorUnitTests.cpp b/tests/ReactorUnitTests/ReactorUnitTests.cpp
index ffd78fc..6da82dd 100644
--- a/tests/ReactorUnitTests/ReactorUnitTests.cpp
+++ b/tests/ReactorUnitTests/ReactorUnitTests.cpp
@@ -622,7 +622,7 @@
 	Nucleus::setOptimizerCallback([](const Nucleus::OptimizerReport *report) {
 		EXPECT_EQ(report->allocas, 1);
 		EXPECT_EQ(report->loads, 1);
-		EXPECT_EQ(report->stores, 5);
+		EXPECT_EQ(report->stores, 3);
 	});
 
 	auto routine = function(testName().c_str());