Propagate stores of stack addresses into other local variables
Pointers to the base address of arrays often get stored and then loaded
multiple time. This change adds an optimization pass to replace these
loads with the result of the alloca of the array, and eliminates the
allocas of the pointer variables.
The PropagateAlloca unit test without this optimization produces:
sub rsp,38h
mov dword ptr [rsp+8],16h
cmp ecx,0
je 000002543C4E407E
lea rax,[rsp+8]
mov qword ptr [rsp],rax
mov rax,qword ptr [rsp]
mov eax,dword ptr [rax]
add rsp,38h
ret
With the optimization pass it becomes:
sub rsp,38h
mov dword ptr [rsp],16h
cmp ecx,0
jne 0000015DC3B33074
mov eax,dword ptr [rsp]
add rsp,38h
ret
Also add a couple of unit tests for corner cases where the propagation
is not safe to perform.
Bug: b/179279298
Change-Id: I784899319bf5360f47c6fbc22fb82134d135dbfc
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/52468
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/Reactor/Optimizer.cpp b/src/Reactor/Optimizer.cpp
index 4ac06de..a3901d7 100644
--- a/src/Reactor/Optimizer.cpp
+++ b/src/Reactor/Optimizer.cpp
@@ -28,7 +28,9 @@
private:
void analyzeUses(Ice::Cfg *function);
+
void eliminateDeadCode();
+ void propagateAlloca();
void eliminateUnitializedLoads();
void eliminateLoadsFollowingSingleStore();
void optimizeStoresInSingleBasicBlock();
@@ -95,7 +97,13 @@
analyzeUses(function);
+ // Start by eliminating any dead code, to avoid redundant work for the
+ // subsequent optimization passes.
eliminateDeadCode();
+
+ // Eliminate allocas which store the address of other allocas.
+ propagateAlloca();
+
eliminateUnitializedLoads();
eliminateLoadsFollowingSingleStore();
optimizeStoresInSingleBasicBlock();
@@ -109,6 +117,63 @@
operandsWithUses.clear();
}
+// Eliminates allocas which store the address of other allocas.
+void Optimizer::propagateAlloca()
+{
+ Ice::CfgNode *entryBlock = function->getEntryNode();
+ Ice::InstList &instList = entryBlock->getInsts();
+
+ for(Ice::Inst &inst : instList)
+ {
+ if(inst.isDeleted())
+ {
+ continue;
+ }
+
+ auto *alloca = llvm::dyn_cast<Ice::InstAlloca>(&inst);
+
+ if(!alloca)
+ {
+ break; // Allocas are all at the top
+ }
+
+ // Look for stores of this alloca's address.
+ Ice::Operand *address = alloca->getDest();
+ Uses uses = *getUses(address); // Hard copy
+
+ for(auto *store : uses)
+ {
+ if(isStore(*store) && store->getData() == address)
+ {
+ Ice::Operand *dest = store->getStoreAddress();
+ Ice::Variable *destVar = llvm::dyn_cast<Ice::Variable>(dest);
+ Ice::Inst *def = destVar ? getDefinition(destVar) : nullptr;
+
+ // If the address is stored into another stack variable, eliminate the latter.
+ if(def && def->getKind() == Ice::Inst::Alloca)
+ {
+ Uses destUses = *getUses(dest); // Hard copy
+
+ // Make sure the only store into the stack variable is this address, and that all of its other uses are loads.
+ // This prevents dynamic array loads/stores to be replaced by a scalar.
+ if((destUses.stores.size() == 1) && (destUses.loads.size() == destUses.size() - 1))
+ {
+ for(auto *load : destUses.loads)
+ {
+ replace(load, address);
+ }
+
+ // The address is now only stored, never loaded, so the store can be eliminated, together with its alloca.
+ assert(getUses(dest)->size() == 1);
+ deleteInstruction(store);
+ assert(def->isDeleted());
+ }
+ }
+ }
+ }
+ }
+}
+
void Optimizer::eliminateDeadCode()
{
bool modified;
diff --git a/tests/ReactorUnitTests/ReactorUnitTests.cpp b/tests/ReactorUnitTests/ReactorUnitTests.cpp
index b9eeb07..aa91dbe 100644
--- a/tests/ReactorUnitTests/ReactorUnitTests.cpp
+++ b/tests/ReactorUnitTests/ReactorUnitTests.cpp
@@ -376,6 +376,83 @@
EXPECT_EQ(result, 7);
}
+// This test excercises the Optimizer::propagateAlloca() optimization pass.
+// The pointer variable should not get stored to / loaded from memory.
+// TODO(b/180665600): Check that the optimization took place.
+TEST(ReactorUnitTests, PropagateAlloca)
+{
+ FunctionT<int(int)> function;
+ {
+ Int b = function.Arg<0>();
+
+ Int a = 22;
+ Pointer<Int> p;
+
+ // This branch materializes both `a` and `p`, and ensures single basic block
+ // optimizations don't also eliminate the pointer store and load.
+ If(b != 0) // TODO(b/179922668): Support If(b)
+ {
+ p = &a;
+ }
+
+ Return(Int(*p)); // TODO(b/179694472): Support Return(*p)
+ }
+
+ auto routine = function(testName().c_str());
+
+ int result = routine(true);
+ EXPECT_EQ(result, 22);
+}
+
+// Corner case for Optimizer::propagateAlloca(). It should not replace loading of `p`
+// with the addres of `a`, since it also got the address of `b` assigned.
+TEST(ReactorUnitTests, PointerToPointer)
+{
+ FunctionT<int()> function;
+ {
+ Int a = 444;
+ Int b = 555;
+ Int c = 666;
+
+ Pointer<Int> p = &a;
+ Pointer<Pointer<Int>> pp = &p;
+ p = &b;
+
+ Return(Int(*Pointer<Int>(*pp))); // TODO(b/179694472): Support **pp
+ }
+
+ auto routine = function(testName().c_str());
+
+ int result = routine();
+ EXPECT_EQ(result, 555);
+}
+
+// Corner case for Optimizer::propagateAlloca(). It should not replace loading of `p[i]`
+// with any of the addresses of the `a`, `b`, or `c`.
+TEST(ReactorUnitTests, ArrayOfPointersToLocals)
+{
+ FunctionT<int(int)> function;
+ {
+ Int i = function.Arg<0>();
+
+ Int a = 111;
+ Int b = 222;
+ Int c = 333;
+
+ Array<Pointer<Int>, 3> p;
+ p[0] = &a;
+ p[1] = &b;
+ p[2] = &c;
+
+ Return(Int(*Pointer<Int>(p[i]))); // TODO(b/179694472): Support *p[i]
+ }
+
+ auto routine = function(testName().c_str());
+
+ int result = routine(1);
+ EXPECT_EQ(result, 222);
+}
+
TEST(ReactorUnitTests, SubVectorLoadStore)
{
FunctionT<int(void *, void *)> function;