Subzero: Apply commutativity to the RMW optimization.
The read-modify-write (RMW) optimization looks for patterns like this:
a = Load addr
b = <op> a, other
Store b, addr
and essentially transforms them into this:
RMW <op>, addr, other
This CL also applies the transformation when the middle instruction is
b = <op> other, a
and <op> is commutative.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095
R=jpp@chromium.org
Review URL: https://codereview.chromium.org/1193103005
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 6e80a8f..c1ba404 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -711,10 +711,13 @@
if (!isSameMemAddressOperand(Load->getSourceAddress(),
Store->getAddr()))
continue;
- if (false && Load->getSourceAddress() != Store->getAddr())
- continue;
- if (Arith->getSrc(0) != Load->getDest())
- continue;
+ Operand *ArithSrcFromLoad = Arith->getSrc(0);
+ Operand *ArithSrcOther = Arith->getSrc(1);
+ if (ArithSrcFromLoad != Load->getDest()) {
+ if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
+ continue;
+ std::swap(ArithSrcFromLoad, ArithSrcOther);
+ }
if (Arith->getDest() != Store->getData())
continue;
if (!canRMW(Arith))
@@ -734,8 +737,7 @@
InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
Node->getInsts().insert(I3, BeaconDef);
InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
- Func, Arith->getSrc(1), Store->getAddr(), Beacon,
- Arith->getOp());
+ Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
Node->getInsts().insert(I3, RMW);
}
}
diff --git a/tests_lit/llvm2ice_tests/rmw.ll b/tests_lit/llvm2ice_tests/rmw.ll
index 321f612..12d365d 100644
--- a/tests_lit/llvm2ice_tests/rmw.ll
+++ b/tests_lit/llvm2ice_tests/rmw.ll
@@ -102,3 +102,31 @@
; Look for something like: add DWORD PTR [eax+ecx*4+12],ecx
; CHECK-LABEL: rmw_add_i32_var_addropt
; CHECK: add DWORD PTR [e{{..}}+e{{..}}*4+0xc],e{{ax|bx|cx|dx|bp|di|si}}
+
+; Test for commutativity opportunities. This is the same as rmw_add_i32_var
+; except with the "add" operands reversed.
+define internal void @rmw_add_i32_var_comm(i32 %addr_arg, i32 %var) {
+entry:
+ %addr = inttoptr i32 %addr_arg to i32*
+ %val = load i32, i32* %addr, align 1
+ %rmw = add i32 %var, %val
+ store i32 %rmw, i32* %addr, align 1
+ ret void
+}
+; Look for something like: add DWORD PTR [eax],ecx
+; CHECK-LABEL: rmw_add_i32_var_comm
+; CHECK: add DWORD PTR [e{{ax|bx|cx|dx|bp|di|si}}],e{{ax|bx|cx|dx|bp|di|si}}
+
+; Test that commutativity isn't triggered for a non-commutative arithmetic
+; operator (sub). This is the same as rmw_add_i32_var_comm except with a
+; "sub" operation.
+define internal i32 @no_rmw_sub_i32_var(i32 %addr_arg, i32 %var) {
+entry:
+ %addr = inttoptr i32 %addr_arg to i32*
+ %val = load i32, i32* %addr, align 1
+ %rmw = sub i32 %var, %val
+ store i32 %rmw, i32* %addr, align 1
+ ret i32 %rmw
+}
+; CHECK-LABEL: no_rmw_sub_i32_var
+; CHECK: sub e{{ax|bx|cx|dx|bp|di|si}},DWORD PTR [e{{ax|bx|cx|dx|bp|di|si}}]