Use ldr for movs out of stack slots (instead of mov reg, [sp/fp]).

So far we've been using ldr/str (32-bit) to load/store
the whole stack slot, independent of the variable type.

Toggle on some tests that didn't have an Om1 variant
previously. Didn't toggle everything since there are still
some problems with liveness from code being unimplemented.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1144923008
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 2c33c43..713df3a 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -313,8 +313,18 @@
   assert(getSrcSize() == 1);
   Variable *Dest = getDest();
   if (Dest->hasReg()) {
-    Str << "\t"
-        << "mov" << getPredicate() << "\t";
+    const char *Opcode = "mov";
+    Operand *Src0 = getSrc(0);
+    if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
+      if (!Src0V->hasReg()) {
+        Opcode = "ldr"; // Always load the full stack slot (vs ldrb, ldrh).
+      }
+    } else {
+      // If Src isn't a variable, it shouldn't be a memory operand either
+      // (otherwise Opcode will have to be ldr).
+      assert(!llvm::isa<OperandARM32Mem>(Src0));
+    }
+    Str << "\t" << Opcode << getPredicate() << "\t";
     getDest()->emit(Func);
     Str << ", ";
     getSrc(0)->emit(Func);
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index e3a0e39..08f7dbe 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -15,6 +15,10 @@
 ; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
 
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
+
 define void @fixed_416_align_16(i32 %n) {
 entry:
   %array = alloca i8, i32 416, align 16
diff --git a/tests_lit/llvm2ice_tests/branch-opt.ll b/tests_lit/llvm2ice_tests/branch-opt.ll
index eecd70f..fc0b2d8 100644
--- a/tests_lit/llvm2ice_tests/branch-opt.ll
+++ b/tests_lit/llvm2ice_tests/branch-opt.ll
@@ -12,11 +12,15 @@
 ; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
 ; once enough infrastructure is in. Also, switch to --filetype=obj
 ; when possible.
-; Also test Om1 when addProlog is done.
 ; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
 ; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32O2 %s
 
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck \
+; RUN:   --check-prefix ARM32OM1 %s
+
 declare void @dummy()
 
 ; An unconditional branch to the next block should be removed.
@@ -44,6 +48,11 @@
 ; ARM32O2: bl {{.*}} dummy
 ; ARM32O2-NEXT: bl {{.*}} dummy
 
+; ARM32OM1-LABEL: testUncondToNextBlock
+; ARM32OM1: bl {{.*}} dummy
+; ARM32OM1-NEXT: b
+; ARM32OM1-NEXT: bl {{.*}} dummy
+
 ; For a conditional branch with a fallthrough to the next block, the
 ; fallthrough branch should be removed.
 define void @testCondFallthroughToNextBlock(i32 %arg) {
@@ -88,6 +97,17 @@
 ; ARM32O2-NEXT: bl
 ; ARM32O2-NEXT: bx lr
 
+; ARM32OM1-LABEL: testCondFallthroughToNextBlock
+; ARM32OM1: cmp {{.*}}, #123
+; ARM32OM1-NEXT: movge {{.*}}, #1
+; ARM32OM1: cmp {{.*}}, #0
+; ARM32OM1: bne
+; ARM32OM1: b
+; ARM32OM1: bl
+; ARM32OM1: bx lr
+; ARM32OM1: bl
+; ARM32OM1: bx lr
+
 ; For a conditional branch with the next block as the target and a
 ; different block as the fallthrough, the branch condition should be
 ; inverted, the fallthrough block changed to the target, and the
@@ -134,3 +154,14 @@
 ; ARM32O2-NEXT: bx lr
 ; ARM32O2-NEXT: bl
 ; ARM32O2-NEXT: bx lr
+
+; ARM32OM1-LABEL: testCondTargetNextBlock
+; ARM32OM1: cmp {{.*}}, #123
+; ARM32OM1: movge {{.*}}, #1
+; ARM32OM1: cmp {{.*}}, #0
+; ARM32OM1: bne
+; ARM32OM1: b
+; ARM32OM1: bl
+; ARM32OM1: bx lr
+; ARM32OM1: bl
+; ARM32OM1: bx lr