Subzero: Apply commutativity to the RMW optimization.

The read-modify-write (RMW) optimization looks for patterns like this:

  a = Load addr
  b = <op> a, other
  Store b, addr

and essentially transforms them into this:

  RMW <op>, addr, other

This CL also applies the transformation when the middle instruction is
  b = <op> other, a
and <op> is commutative.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095
R=jpp@chromium.org

Review URL: https://codereview.chromium.org/1193103005
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 6e80a8f..c1ba404 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -711,10 +711,13 @@
             if (!isSameMemAddressOperand(Load->getSourceAddress(),
                                          Store->getAddr()))
               continue;
-            if (false && Load->getSourceAddress() != Store->getAddr())
-              continue;
-            if (Arith->getSrc(0) != Load->getDest())
-              continue;
+            Operand *ArithSrcFromLoad = Arith->getSrc(0);
+            Operand *ArithSrcOther = Arith->getSrc(1);
+            if (ArithSrcFromLoad != Load->getDest()) {
+              if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
+                continue;
+              std::swap(ArithSrcFromLoad, ArithSrcOther);
+            }
             if (Arith->getDest() != Store->getData())
               continue;
             if (!canRMW(Arith))
@@ -734,8 +737,7 @@
             InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
             Node->getInsts().insert(I3, BeaconDef);
             InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
-                Func, Arith->getSrc(1), Store->getAddr(), Beacon,
-                Arith->getOp());
+                Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
             Node->getInsts().insert(I3, RMW);
           }
         }