Subzero: Implemented codegen for poisoning and unpoisoning stack redzones

BUG=https://bugs.chromium.org/p/nativeclient/issues/detail?id=4374
R=kschimpf@google.com, stichnot@chromium.org

Review URL: https://codereview.chromium.org/2194853003 .
diff --git a/src/IceASanInstrumentation.cpp b/src/IceASanInstrumentation.cpp
index 1aa1730..6286299 100644
--- a/src/IceASanInstrumentation.cpp
+++ b/src/IceASanInstrumentation.cpp
@@ -31,12 +31,16 @@
 
 namespace {
 
-constexpr const char *ASanPrefix = "__asan";
+constexpr SizeT BytesPerWord = sizeof(uint32_t);
 constexpr SizeT RzSize = 32;
+constexpr SizeT ShadowScaleLog2 = 3;
+constexpr SizeT ShadowScale = 1 << ShadowScaleLog2;
+constexpr SizeT ShadowLength32 = 1 << (32 - ShadowScaleLog2);
+constexpr int32_t StackPoisonVal = -1;
+constexpr const char *ASanPrefix = "__asan";
 constexpr const char *RzPrefix = "__$rz";
 constexpr const char *RzArrayName = "__$rz_array";
 constexpr const char *RzSizesName = "__$rz_sizes";
-constexpr char RzStackPoison = -1;
 const llvm::NaClBitcodeRecord::RecordVector RzContents =
     llvm::NaClBitcodeRecord::RecordVector(RzSize, 'R');
 
@@ -64,7 +68,7 @@
 } // end of anonymous namespace
 
 ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, LocalVars);
-ICE_TLS_DEFINE_FIELD(std::vector<InstCall *> *, ASanInstrumentation,
+ICE_TLS_DEFINE_FIELD(std::vector<InstStore *> *, ASanInstrumentation,
                      LocalDtors);
 
 bool ASanInstrumentation::isInstrumentable(Cfg *Func) {
@@ -162,25 +166,59 @@
 // redzone if it is found
 void ASanInstrumentation::instrumentFuncStart(LoweringContext &Context) {
   if (ICE_TLS_GET_FIELD(LocalDtors) == nullptr) {
-    ICE_TLS_SET_FIELD(LocalDtors, new std::vector<InstCall *>());
+    ICE_TLS_SET_FIELD(LocalDtors, new std::vector<InstStore *>());
     ICE_TLS_SET_FIELD(LocalVars, new VarSizeMap());
   }
   Cfg *Func = Context.getNode()->getCfg();
-  bool HasLocals = false;
-  LoweringContext C;
-  C.init(Context.getNode());
-  std::vector<Inst *> Initializations;
-  Constant *InitFunc =
-      Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_poison"));
-  Constant *DestroyFunc =
-      Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_unpoison"));
-
+  using Entry = std::pair<SizeT, int32_t>;
+  std::vector<InstAlloca *> NewAllocas;
+  std::vector<Entry> PoisonVals;
+  Variable *FirstShadowLocVar;
+  InstArithmetic *ShadowIndexCalc;
+  InstArithmetic *ShadowLocCalc;
   InstAlloca *Cur;
   ConstantInteger32 *VarSizeOp;
-  while (
-      (Cur = llvm::dyn_cast<InstAlloca>(iteratorToInst(C.getCur()))) &&
-      (VarSizeOp = llvm::dyn_cast<ConstantInteger32>(Cur->getSizeInBytes()))) {
-    HasLocals = true;
+  while (!Context.atEnd()) {
+    Cur = llvm::dyn_cast<InstAlloca>(iteratorToInst(Context.getCur()));
+    VarSizeOp = (Cur == nullptr)
+                    ? nullptr
+                    : llvm::dyn_cast<ConstantInteger32>(Cur->getSizeInBytes());
+    if (Cur == nullptr || VarSizeOp == nullptr) {
+      Context.advanceCur();
+      Context.advanceNext();
+      continue;
+    }
+
+    Cur->setDeleted();
+
+    if (PoisonVals.empty()) {
+      // insert leftmost redzone
+      auto *LastRzVar = Func->makeVariable(IceType_i32);
+      LastRzVar->setName(Func, nextRzName());
+      auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, RzSize);
+      constexpr SizeT Alignment = 8;
+      NewAllocas.emplace_back(
+          InstAlloca::create(Func, LastRzVar, ByteCount, Alignment));
+      PoisonVals.emplace_back(Entry{RzSize >> ShadowScaleLog2, StackPoisonVal});
+
+      // Calculate starting address for poisoning
+      FirstShadowLocVar = Func->makeVariable(IceType_i32);
+      FirstShadowLocVar->setName(Func, "firstShadowLoc");
+      auto *ShadowIndexVar = Func->makeVariable(IceType_i32);
+      ShadowIndexVar->setName(Func, "shadowIndex");
+
+      auto *ShadowScaleLog2Const =
+          ConstantInteger32::create(Ctx, IceType_i32, ShadowScaleLog2);
+      auto *ShadowMemLocConst =
+          ConstantInteger32::create(Ctx, IceType_i32, ShadowLength32);
+
+      ShadowIndexCalc =
+          InstArithmetic::create(Func, InstArithmetic::Lshr, ShadowIndexVar,
+                                 LastRzVar, ShadowScaleLog2Const);
+      ShadowLocCalc =
+          InstArithmetic::create(Func, InstArithmetic::Add, FirstShadowLocVar,
+                                 ShadowIndexVar, ShadowMemLocConst);
+    }
 
     // create the new alloca that includes a redzone
     SizeT VarSize = VarSizeOp->getValue();
@@ -190,72 +228,63 @@
     auto *ByteCount =
         ConstantInteger32::create(Ctx, IceType_i32, VarSize + RzPadding);
     constexpr SizeT Alignment = 8;
-    auto *NewVar = InstAlloca::create(Func, Dest, ByteCount, Alignment);
+    NewAllocas.emplace_back(
+        InstAlloca::create(Func, Dest, ByteCount, Alignment));
 
-    // calculate the redzone offset
-    Variable *RzLocVar = Func->makeVariable(IceType_i32);
-    RzLocVar->setName(Func, nextRzName());
-    auto *Offset = ConstantInteger32::create(Ctx, IceType_i32, VarSize);
-    auto *RzLoc = InstArithmetic::create(Func, InstArithmetic::Add, RzLocVar,
-                                         Dest, Offset);
-
-    // instructions to poison and unpoison the redzone
-    constexpr SizeT NumArgs = 2;
-    constexpr Variable *Void = nullptr;
-    constexpr bool NoTailcall = false;
-    auto *RzSizeConst = ConstantInteger32::create(Ctx, IceType_i32, RzPadding);
-    auto *RzPoisonConst =
-        ConstantInteger32::create(Ctx, IceType_i32, RzStackPoison);
-    auto *Init = InstCall::create(Func, NumArgs, Void, InitFunc, NoTailcall);
-    Init->addArg(RzLocVar);
-    Init->addArg(RzSizeConst);
-    Init->addArg(RzPoisonConst);
-    auto *Destroy =
-        InstCall::create(Func, NumArgs, Void, DestroyFunc, NoTailcall);
-    Destroy->addArg(RzLocVar);
-    Destroy->addArg(RzSizeConst);
-    Cur->setDeleted();
-    C.insert(NewVar);
-    ICE_TLS_GET_FIELD(LocalDtors)->emplace_back(Destroy);
-    Initializations.emplace_back(RzLoc);
-    Initializations.emplace_back(Init);
-
-    C.advanceCur();
-    C.advanceNext();
+    const SizeT Zeros = VarSize >> ShadowScaleLog2;
+    const SizeT Offset = VarSize % ShadowScale;
+    const SizeT PoisonBytes =
+        ((VarSize + RzPadding) >> ShadowScaleLog2) - Zeros - 1;
+    if (Zeros > 0)
+      PoisonVals.emplace_back(Entry{Zeros, 0});
+    PoisonVals.emplace_back(Entry{1, (Offset == 0) ? StackPoisonVal : Offset});
+    PoisonVals.emplace_back(Entry{PoisonBytes, StackPoisonVal});
+    Context.advanceCur();
+    Context.advanceNext();
   }
 
-  C.setInsertPoint(C.getCur());
-
-  // add the leftmost redzone
-  if (HasLocals) {
-    Variable *LastRz = Func->makeVariable(IceType_i32);
-    LastRz->setName(Func, nextRzName());
-    auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, RzSize);
-    constexpr SizeT Alignment = 8;
-    auto *RzAlloca = InstAlloca::create(Func, LastRz, ByteCount, Alignment);
-
-    constexpr SizeT NumArgs = 2;
-    constexpr Variable *Void = nullptr;
-    constexpr bool NoTailcall = false;
-    auto *RzPoisonConst =
-        ConstantInteger32::create(Ctx, IceType_i32, RzStackPoison);
-    auto *Init = InstCall::create(Func, NumArgs, Void, InitFunc, NoTailcall);
-    Init->addArg(LastRz);
-    Init->addArg(RzAlloca->getSizeInBytes());
-    Init->addArg(RzPoisonConst);
-    auto *Destroy =
-        InstCall::create(Func, NumArgs, Void, DestroyFunc, NoTailcall);
-    Destroy->addArg(LastRz);
-    Destroy->addArg(RzAlloca->getSizeInBytes());
-    ICE_TLS_GET_FIELD(LocalDtors)->emplace_back(Destroy);
-    C.insert(RzAlloca);
-    C.insert(Init);
+  Context.rewind();
+  if (PoisonVals.empty()) {
+    Context.advanceNext();
+    return;
   }
-
-  // insert initializers for the redzones
-  for (Inst *Init : Initializations) {
-    C.insert(Init);
+  for (InstAlloca *RzAlloca : NewAllocas) {
+    Context.insert(RzAlloca);
   }
+  Context.insert(ShadowIndexCalc);
+  Context.insert(ShadowLocCalc);
+
+  // Poison redzones
+  std::vector<Entry>::iterator Iter = PoisonVals.begin();
+  for (SizeT Offset = 0; Iter != PoisonVals.end(); Offset += BytesPerWord) {
+    int32_t CurVals[BytesPerWord] = {0};
+    for (uint32_t i = 0; i < BytesPerWord; ++i) {
+      if (Iter == PoisonVals.end())
+        break;
+      Entry Val = *Iter;
+      CurVals[i] = Val.second;
+      --Val.first;
+      if (Val.first > 0)
+        *Iter = Val;
+      else
+        ++Iter;
+    }
+    int32_t Poison = ((CurVals[3] & 0xff) << 24) | ((CurVals[2] & 0xff) << 16) |
+                     ((CurVals[1] & 0xff) << 8) | (CurVals[0] & 0xff);
+    if (Poison == 0)
+      continue;
+    auto *PoisonConst = ConstantInteger32::create(Ctx, IceType_i32, Poison);
+    auto *ZeroConst = ConstantInteger32::create(Ctx, IceType_i32, 0);
+    auto *OffsetConst = ConstantInteger32::create(Ctx, IceType_i32, Offset);
+    auto *PoisonAddrVar = Func->makeVariable(IceType_i32);
+    Context.insert(InstArithmetic::create(Func, InstArithmetic::Add,
+                                          PoisonAddrVar, FirstShadowLocVar,
+                                          OffsetConst));
+    Context.insert(InstStore::create(Func, PoisonConst, PoisonAddrVar));
+    ICE_TLS_GET_FIELD(LocalDtors)
+        ->emplace_back(InstStore::create(Func, ZeroConst, PoisonAddrVar));
+  }
+  Context.advanceNext();
 }
 
 void ASanInstrumentation::instrumentCall(LoweringContext &Context,
@@ -332,22 +361,13 @@
 
 void ASanInstrumentation::instrumentRet(LoweringContext &Context, InstRet *) {
   Cfg *Func = Context.getNode()->getCfg();
-  InstList::iterator Next = Context.getNext();
   Context.setInsertPoint(Context.getCur());
-  for (InstCall *RzUnpoison : *ICE_TLS_GET_FIELD(LocalDtors)) {
-    SizeT NumArgs = RzUnpoison->getNumArgs();
-    Variable *Dest = RzUnpoison->getDest();
-    Operand *CallTarget = RzUnpoison->getCallTarget();
-    bool HasTailCall = RzUnpoison->isTailcall();
-    bool IsTargetHelperCall = RzUnpoison->isTargetHelperCall();
-    auto *RzUnpoisonCpy = InstCall::create(Func, NumArgs, Dest, CallTarget,
-                                           HasTailCall, IsTargetHelperCall);
-    for (int I = 0, Args = RzUnpoison->getNumArgs(); I < Args; ++I) {
-      RzUnpoisonCpy->addArg(RzUnpoison->getArg(I));
-    }
-    Context.insert(RzUnpoisonCpy);
+  for (InstStore *RzUnpoison : *ICE_TLS_GET_FIELD(LocalDtors)) {
+    Context.insert(
+        InstStore::create(Func, RzUnpoison->getData(), RzUnpoison->getAddr()));
   }
-  Context.setNext(Next);
+  Context.advanceCur();
+  Context.advanceNext();
 }
 
 void ASanInstrumentation::instrumentStart(Cfg *Func) {
diff --git a/src/IceASanInstrumentation.h b/src/IceASanInstrumentation.h
index f3090ef..3959b36 100644
--- a/src/IceASanInstrumentation.h
+++ b/src/IceASanInstrumentation.h
@@ -54,7 +54,7 @@
   void instrumentStart(Cfg *Func) override;
   void finishFunc(Cfg *Func) override;
   ICE_TLS_DECLARE_FIELD(VarSizeMap *, LocalVars);
-  ICE_TLS_DECLARE_FIELD(std::vector<InstCall *> *, LocalDtors);
+  ICE_TLS_DECLARE_FIELD(std::vector<InstStore *> *, LocalDtors);
   GlobalSizeMap GlobalSizes;
   std::atomic<uint32_t> RzNum;
   bool DidProcessGlobals = false;
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index d55f79b..eace318 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -897,7 +897,7 @@
     uint32_t Align1 = A1->getAlignInBytes();
     uint32_t Align2 = A2->getAlignInBytes();
     if (Align1 == Align2)
-      return A1->getNumber() > A2->getNumber();
+      return A1->getNumber() < A2->getNumber();
     else
       return Align1 > Align2;
   });
diff --git a/tests_lit/asan_tests/alignment.ll b/tests_lit/asan_tests/alignment.ll
index b288df4..908f2d0 100644
--- a/tests_lit/asan_tests/alignment.ll
+++ b/tests_lit/asan_tests/alignment.ll
@@ -13,34 +13,19 @@
 }
 
 ; CHECK: func
-; CHECK-NEXT: sub    esp,0xbc
-; CHECK-NEXT: lea    eax,[esp+0x10]
-; CHECK-NEXT: mov    DWORD PTR [esp],eax
-; CHECK-NEXT: mov    DWORD PTR [esp+0x4],0x20
-; CHECK-NEXT: mov    DWORD PTR [esp+0x8],0xffffffff
-; CHECK-NEXT: __asan_poison
-; CHECK-NEXT: lea    eax,[esp+0x74]
-; CHECK-NEXT: mov    DWORD PTR [esp],eax
-; CHECK-NEXT: mov    DWORD PTR [esp+0x4],0x3c
-; CHECK-NEXT: mov    DWORD PTR [esp+0x8],0xffffffff
-; CHECK-NEXT: __asan_poison
-; CHECK-NEXT: lea    eax,[esp+0x35]
-; CHECK-NEXT: mov    DWORD PTR [esp],eax
-; CHECK-NEXT: mov    DWORD PTR [esp+0x4],0x3b
-; CHECK-NEXT: mov    DWORD PTR [esp+0x8],0xffffffff
-; CHECK-NEXT: __asan_poison
-; CHECK-NEXT: lea    eax,[esp+0x74]
-; CHECK-NEXT: mov    DWORD PTR [esp],eax
-; CHECK-NEXT: mov    DWORD PTR [esp+0x4],0x3c
-; CHECK-NEXT: __asan_unpoison
-; CHECK-NEXT: lea    eax,[esp+0x35]
-; CHECK-NEXT: mov    DWORD PTR [esp],eax
-; CHECK-NEXT: mov    DWORD PTR [esp+0x4],0x3b
-; CHECK-NEXT: __asan_unpoison
-; CHECK-NEXT: lea    eax,[esp+0x10]
-; CHECK-NEXT: mov    DWORD PTR [esp],eax
-; CHECK-NEXT: mov    DWORD PTR [esp+0x4],0x20
-; CHECK-NEXT: __asan_unpoison
+; CHECK-NEXT: sub    esp,0xa0
+; CHECK-NEXT: lea    eax,[esp]
+; CHECK-NEXT: shr    eax,0x3
+; CHECK-NEXT: mov    DWORD PTR [eax+0x20000000],0xffffffff
+; CHECK-NEXT: mov    DWORD PTR [eax+0x20000004],0xffffff04
+; CHECK-NEXT: mov    DWORD PTR [eax+0x20000008],0xffffffff
+; CHECK-NEXT: mov    DWORD PTR [eax+0x2000000c],0xffffff05
+; CHECK-NEXT: mov    DWORD PTR [eax+0x20000010],0xffffffff
+; CHECK-NEXT: mov    DWORD PTR [eax+0x20000000],0x0
+; CHECK-NEXT: mov    DWORD PTR [eax+0x20000004],0x0
+; CHECK-NEXT: mov    DWORD PTR [eax+0x20000008],0x0
+; CHECK-NEXT: mov    DWORD PTR [eax+0x2000000c],0x0
+; CHECK-NEXT: mov    DWORD PTR [eax+0x20000010],0x0
 ; CHECK-NEXT: mov    eax,0x2a
-; CHECK-NEXT: add    esp,0xbc
+; CHECK-NEXT: add    esp,0xa0
 ; CHECK-NEXT: ret
diff --git a/tests_lit/asan_tests/blacklist.ll b/tests_lit/asan_tests/blacklist.ll
index c988a2c..1281721 100644
--- a/tests_lit/asan_tests/blacklist.ll
+++ b/tests_lit/asan_tests/blacklist.ll
@@ -36,14 +36,20 @@
 ; DUMP-LABEL: ================ Instrumented CFG ================
 ; DUMP-NEXT: define internal void @func() {
 ; DUMP-NEXT: __0:
+; DUMP-NEXT:   %__$rz0 = alloca i8, i32 32, align 8
 ; DUMP-NEXT:   %local = alloca i8, i32 64, align 8
-; DUMP-NEXT:   %__$rz1 = alloca i8, i32 32, align 8
-; DUMP-NEXT:   call void @__asan_poison(i32 %__$rz1, i32 32, i32 -1)
-; DUMP-NEXT:   %__$rz0 = add i32 %local, 4
-; DUMP-NEXT:   call void @__asan_poison(i32 %__$rz0, i32 60, i32 -1)
+; DUMP-NEXT:   %shadowIndex = lshr i32 %__$rz0, 3
+; DUMP-NEXT:   %firstShadowLoc = add i32 %shadowIndex, 536870912
+; DUMP-NEXT:   %__5 = add i32 %firstShadowLoc, 0
+; DUMP-NEXT:   store i32 -1, i32* %__5, align 1
+; DUMP-NEXT:   %__6 = add i32 %firstShadowLoc, 4
+; DUMP-NEXT:   store i32 -252, i32* %__6, align 1
+; DUMP-NEXT:   %__7 = add i32 %firstShadowLoc, 8
+; DUMP-NEXT:   store i32 -1, i32* %__7, align 1
 ; DUMP-NEXT:   %heapvar = call i32 @__asan_malloc(i32 42)
 ; DUMP-NEXT:   call void @__asan_free(i32 %heapvar)
-; DUMP-NEXT:   call void @__asan_unpoison(i32 %__$rz0, i32 60)
-; DUMP-NEXT:   call void @__asan_unpoison(i32 %__$rz1, i32 32)
+; DUMP-NEXT:   store i32 0, i32* %__5, align 1
+; DUMP-NEXT:   store i32 0, i32* %__6, align 1
+; DUMP-NEXT:   store i32 0, i32* %__7, align 1
 ; DUMP-NEXT:   ret void
 ; DUMP-NEXT: }
diff --git a/tests_lit/asan_tests/errors.ll b/tests_lit/asan_tests/errors.ll
index 3e7f2da..cc403cc 100644
--- a/tests_lit/asan_tests/errors.ll
+++ b/tests_lit/asan_tests/errors.ll
@@ -6,70 +6,115 @@
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 2>&1 | FileCheck --check-prefix=LOCAL-LOAD %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 2>&1 | FileCheck --check-prefix=LOCAL-LOAD %s
 
 ; check with a many off the end local load
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2>&1 | FileCheck --check-prefix=LOCAL-LOAD %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2>&1 | FileCheck --check-prefix=LOCAL-LOAD %s
 
 ; check with a one before the front local load
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 2>&1 | FileCheck --check-prefix=LOCAL-LOAD %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2\
+; RUN:     %t.pexe -o %t && %t 1 2 2>&1 | FileCheck --check-prefix=LOCAL-LOAD %s
 
 ; check with a one off the end global load
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 3 2>&1 | FileCheck \
 ; RUN:     --check-prefix=GLOBAL-LOAD %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2 3 2>&1 | FileCheck \
+; RUN:     --check-prefix=GLOBAL-LOAD %s
 
 ; check with a many off the end global load
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 3 4 2>&1 | FileCheck \
 ; RUN:    --check-prefix=GLOBAL-LOAD %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2 3 4 2>&1 | FileCheck \
+; RUN:     --check-prefix=GLOBAL-LOAD %s
 
 ; check with a one before the front global load
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 2>&1 | FileCheck \
 ; RUN:     --check-prefix=GLOBAL-LOAD %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 2>&1 | FileCheck \
+; RUN:     --check-prefix=GLOBAL-LOAD %s
 
 ; check with a one off the end local store
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 2>&1 | FileCheck \
 ; RUN:     --check-prefix=LOCAL-STORE %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 2>&1 | FileCheck \
+; RUN:     --check-prefix=LOCAL-STORE %s
 
 ; check with a many off the end local store
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 2>&1 | FileCheck \
 ; RUN:     --check-prefix=LOCAL-STORE %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 2>&1 | FileCheck \
+; RUN:     --check-prefix=LOCAL-STORE %s
 
 ; check with a one before the front local store
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 8 2>&1 | FileCheck \
 ; RUN:     --check-prefix=LOCAL-STORE %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 8 2>&1 | FileCheck \
+; RUN:     --check-prefix=LOCAL-STORE %s
 
 ; check with a one off the end global store
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 8 9 2>&1 | FileCheck \
 ; RUN:     --check-prefix=GLOBAL-STORE %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 8 9 2>&1 | FileCheck \
+; RUN:     --check-prefix=GLOBAL-STORE %s
 
 ; check with a many off the end global store
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 8 9 10 2>&1 | FileCheck \
 ; RUN:    --check-prefix=GLOBAL-STORE %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 8 9 10 2>&1 | FileCheck \
+; RUN:    --check-prefix=GLOBAL-STORE %s
 
 ; check with a one before the front global store
 ; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
 ; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols \
 ; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 8 9 10 11 2>&1 | FileCheck \
 ; RUN:     --check-prefix=GLOBAL-STORE %s
+; RUN: llvm-as %s -o - | pnacl-freeze > %t.pexe && %S/../../pydir/szbuild.py \
+; RUN:     --fsanitize-address --sz=-allow-externally-defined-symbols -O2 \
+; RUN:     %t.pexe -o %t && %t 1 2 3 4 5 6 7 8 9 10 11 2>&1 | FileCheck \
+; RUN:     --check-prefix=GLOBAL-STORE %s
 
 declare external void @exit(i32)
 
diff --git a/tests_lit/asan_tests/instrumentlocals.ll b/tests_lit/asan_tests/instrumentlocals.ll
index 2cdbe17..38688fe 100644
--- a/tests_lit/asan_tests/instrumentlocals.ll
+++ b/tests_lit/asan_tests/instrumentlocals.ll
@@ -3,7 +3,7 @@
 ; REQUIRES: allow_dump
 
 ; RUN: %p2i -i %s --args -verbose=inst -threads=0 -fsanitize-address \
-; RUN:     | FileCheck --check-prefix=DUMP %s
+; RUN:     -allow-externally-defined-symbols | FileCheck --check-prefix=DUMP %s
 
 ; Function with local variables to be instrumented
 define internal void @func() {
@@ -12,34 +12,63 @@
   %local3 = alloca i8, i32 13, align 2
   %local4 = alloca i8, i32 75, align 4
   %local5 = alloca i8, i32 64, align 8
+  %i1 = ptrtoint i8* %local1 to i32
+  %i2 = ptrtoint i8* %local2 to i32
+  %i3 = ptrtoint i8* %local3 to i32
+  %i4 = ptrtoint i8* %local4 to i32
+  %i5 = ptrtoint i8* %local5 to i32
+  call void @foo(i32 %i1)
+  call void @foo(i32 %i2)
+  call void @foo(i32 %i3)
+  call void @foo(i32 %i4)
+  call void @foo(i32 %i5)
   ret void
 }
 
+declare external void @foo(i32)
+
 ; DUMP-LABEL: ================ Instrumented CFG ================
 ; DUMP-NEXT: define internal void @func() {
 ; DUMP-NEXT: __0:
-; DUMP-NEXT: %local1 = alloca i8, i32 64, align 8
-; DUMP-NEXT: %local2 = alloca i8, i32 64, align 8
-; DUMP-NEXT: %local3 = alloca i8, i32 64, align 8
-; DUMP-NEXT: %local4 = alloca i8, i32 128, align 8
-; DUMP-NEXT: %local5 = alloca i8, i32 96, align 8
-; DUMP-NEXT: %__$rz[[RZ0:[0-9]+]] = alloca i8, i32 32, align 8
-; DUMP-NEXT: call void @__asan_poison(i32 %__$rz[[RZ0]], i32 32, i32 -1)
-; DUMP-NEXT: %__$rz[[RZ1:[0-9]+]] = add i32 %local1, 4
-; DUMP-NEXT: call void @__asan_poison(i32 %__$rz[[RZ1]], i32 60, i32 -1)
-; DUMP-NEXT: %__$rz[[RZ2:[0-9]+]] = add i32 %local2, 32
-; DUMP-NEXT: call void @__asan_poison(i32 %__$rz[[RZ2]], i32 32, i32 -1)
-; DUMP-NEXT: %__$rz[[RZ3:[0-9]+]] = add i32 %local3, 13
-; DUMP-NEXT: call void @__asan_poison(i32 %__$rz[[RZ3]], i32 51, i32 -1)
-; DUMP-NEXT: %__$rz[[RZ4:[0-9]+]] = add i32 %local4, 75
-; DUMP-NEXT: call void @__asan_poison(i32 %__$rz[[RZ4]], i32 53, i32 -1)
-; DUMP-NEXT: %__$rz[[RZ5:[0-9]+]] = add i32 %local5, 64
-; DUMP-NEXT: call void @__asan_poison(i32 %__$rz[[RZ5]], i32 32, i32 -1)
-; DUMP-NEXT: call void @__asan_unpoison(i32 %__$rz[[RZ1]], i32 60)
-; DUMP-NEXT: call void @__asan_unpoison(i32 %__$rz[[RZ2]], i32 32)
-; DUMP-NEXT: call void @__asan_unpoison(i32 %__$rz[[RZ3]], i32 51)
-; DUMP-NEXT: call void @__asan_unpoison(i32 %__$rz[[RZ4]], i32 53)
-; DUMP-NEXT: call void @__asan_unpoison(i32 %__$rz[[RZ5]], i32 32)
-; DUMP-NEXT: call void @__asan_unpoison(i32 %__$rz[[RZ0]], i32 32)
-; DUMP-NEXT: ret void
+; DUMP-NEXT:   %__$rz0 = alloca i8, i32 32, align 8
+; DUMP-NEXT:   %local1 = alloca i8, i32 64, align 8
+; DUMP-NEXT:   %local2 = alloca i8, i32 64, align 8
+; DUMP-NEXT:   %local3 = alloca i8, i32 64, align 8
+; DUMP-NEXT:   %local4 = alloca i8, i32 128, align 8
+; DUMP-NEXT:   %local5 = alloca i8, i32 96, align 8
+; DUMP-NEXT:   %shadowIndex = lshr i32 %__$rz0, 3
+; DUMP-NEXT:   %firstShadowLoc = add i32 %shadowIndex, 536870912
+; DUMP-NEXT:   %__8 = add i32 %firstShadowLoc, 0
+; DUMP-NEXT:   store i32 -1, i32* %__8, align 1
+; DUMP-NEXT:   %__9 = add i32 %firstShadowLoc, 4
+; DUMP-NEXT:   store i32 -252, i32* %__9, align 1
+; DUMP-NEXT:   %__10 = add i32 %firstShadowLoc, 8
+; DUMP-NEXT:   store i32 -1, i32* %__10, align 1
+; DUMP-NEXT:   %__11 = add i32 %firstShadowLoc, 16
+; DUMP-NEXT:   store i32 -1, i32* %__11, align 1
+; DUMP-NEXT:   %__12 = add i32 %firstShadowLoc, 20
+; DUMP-NEXT:   store i32 -64256, i32* %__12, align 1
+; DUMP-NEXT:   %__13 = add i32 %firstShadowLoc, 24
+; DUMP-NEXT:   store i32 -1, i32* %__13, align 1
+; DUMP-NEXT:   %__14 = add i32 %firstShadowLoc, 36
+; DUMP-NEXT:   store i32 -64768, i32* %__14, align 1
+; DUMP-NEXT:   %__15 = add i32 %firstShadowLoc, 40
+; DUMP-NEXT:   store i32 -1, i32* %__15, align 1
+; DUMP-NEXT:   %__16 = add i32 %firstShadowLoc, 52
+; DUMP-NEXT:   store i32 -1, i32* %__16, align 1
+; DUMP-NEXT:   call void @foo(i32 %local1)
+; DUMP-NEXT:   call void @foo(i32 %local2)
+; DUMP-NEXT:   call void @foo(i32 %local3)
+; DUMP-NEXT:   call void @foo(i32 %local4)
+; DUMP-NEXT:   call void @foo(i32 %local5)
+; DUMP-NEXT:   store i32 0, i32* %__8, align 1
+; DUMP-NEXT:   store i32 0, i32* %__9, align 1
+; DUMP-NEXT:   store i32 0, i32* %__10, align 1
+; DUMP-NEXT:   store i32 0, i32* %__11, align 1
+; DUMP-NEXT:   store i32 0, i32* %__12, align 1
+; DUMP-NEXT:   store i32 0, i32* %__13, align 1
+; DUMP-NEXT:   store i32 0, i32* %__14, align 1
+; DUMP-NEXT:   store i32 0, i32* %__15, align 1
+; DUMP-NEXT:   store i32 0, i32* %__16, align 1
+; DUMP-NEXT:   ret void
 ; DUMP-NEXT: }
diff --git a/tests_lit/asan_tests/multiple_returns.ll b/tests_lit/asan_tests/multiple_returns.ll
index a49e26f..ec5ed6a 100644
--- a/tests_lit/asan_tests/multiple_returns.ll
+++ b/tests_lit/asan_tests/multiple_returns.ll
@@ -16,27 +16,38 @@
   ret void
 }
 
-; DUMP-LABEL: ================ Instrumented CFG ================
+; DUMP-LABEL:================ Instrumented CFG ================
 ; DUMP-NEXT: define internal void @ret_twice(i32 %condarg) {
 ; DUMP-NEXT: __0:
+; DUMP-NEXT:   %__$rz0 = alloca i8, i32 32, align 8
 ; DUMP-NEXT:   %local1 = alloca i8, i32 64, align 8
 ; DUMP-NEXT:   %local2 = alloca i8, i32 64, align 8
-; DUMP-NEXT:   %__$rz2 = alloca i8, i32 32, align 8
-; DUMP-NEXT:   call void @__asan_poison(i32 %__$rz2, i32 32, i32 -1)
-; DUMP-NEXT:   %__$rz0 = add i32 %local1, 4
-; DUMP-NEXT:   call void @__asan_poison(i32 %__$rz0, i32 60, i32 -1)
-; DUMP-NEXT:   %__$rz1 = add i32 %local2, 4
-; DUMP-NEXT:   call void @__asan_poison(i32 %__$rz1, i32 60, i32 -1)
+; DUMP-NEXT:   %shadowIndex = lshr i32 %__$rz0, 3
+; DUMP-NEXT:   %firstShadowLoc = add i32 %shadowIndex, 53687091
+; DUMP-NEXT:   %__7 = add i32 %firstShadowLoc, 0
+; DUMP-NEXT:   store i32 -1, i32* %__7, align 1
+; DUMP-NEXT:   %__8 = add i32 %firstShadowLoc, 4
+; DUMP-NEXT:   store i32 -252, i32* %__8, align 1
+; DUMP-NEXT:   %__9 = add i32 %firstShadowLoc, 8
+; DUMP-NEXT:   store i32 -1, i32* %__9, align 1
+; DUMP-NEXT:   %__10 = add i32 %firstShadowLoc, 12
+; DUMP-NEXT:   store i32 -252, i32* %__10, align 1
+; DUMP-NEXT:   %__11 = add i32 %firstShadowLoc, 16
+; DUMP-NEXT:   store i32 -1, i32* %__11, align 1
 ; DUMP-NEXT:   %cond = icmp ne i32 %condarg, 0
 ; DUMP-NEXT:   br i1 %cond, label %yes, label %no
 ; DUMP-NEXT: yes:
-; DUMP-NEXT:   call void @__asan_unpoison(i32 %__$rz0, i32 60)
-; DUMP-NEXT:   call void @__asan_unpoison(i32 %__$rz1, i32 60)
-; DUMP-NEXT:   call void @__asan_unpoison(i32 %__$rz2, i32 32)
+; DUMP-NEXT:   store i32 0, i32* %__7, align 1
+; DUMP-NEXT:   store i32 0, i32* %__8, align 1
+; DUMP-NEXT:   store i32 0, i32* %__9, align 1
+; DUMP-NEXT:   store i32 0, i32* %__10, align 1
+; DUMP-NEXT:   store i32 0, i32* %__11, align 1
 ; DUMP-NEXT:   ret void
 ; DUMP-NEXT: no:
-; DUMP-NEXT:   call void @__asan_unpoison(i32 %__$rz0, i32 60)
-; DUMP-NEXT:   call void @__asan_unpoison(i32 %__$rz1, i32 60)
-; DUMP-NEXT:   call void @__asan_unpoison(i32 %__$rz2, i32 32)
+; DUMP-NEXT:   store i32 0, i32* %__7, align 1
+; DUMP-NEXT:   store i32 0, i32* %__8, align 1
+; DUMP-NEXT:   store i32 0, i32* %__9, align 1
+; DUMP-NEXT:   store i32 0, i32* %__10, align 1
+; DUMP-NEXT:   store i32 0, i32* %__11, align 1
 ; DUMP-NEXT:   ret void
-; DUMP-NEXT: }
\ No newline at end of file
+; DUMP-NEXT: }
diff --git a/tests_lit/asan_tests/scatteredallocas.ll b/tests_lit/asan_tests/scatteredallocas.ll
new file mode 100644
index 0000000..5c4588c
--- /dev/null
+++ b/tests_lit/asan_tests/scatteredallocas.ll
@@ -0,0 +1,63 @@
+; Test that static allocas throughout the entry block are instrumented correctly
+
+; REQUIRES: allow_dump
+
+; RUN: %p2i -i %s --args -verbose=inst -threads=0 -fsanitize-address \
+; RUN:     -allow-externally-defined-symbols | FileCheck --check-prefix=DUMP %s
+
+declare external i32 @malloc(i32)
+declare external void @free(i32)
+
+define void @func() {
+  %a = alloca i8, i32 4, align 4
+  %m1 = call i32 @malloc(i32 42)
+  %b = alloca i8, i32 16, align 4
+  store i8 50, i8* %a, align 1
+  %c = alloca i8, i32 8, align 8
+  call void @free(i32 %m1)
+  %d = alloca i8, i32 12, align 4
+  ret void
+}
+
+; DUMP-LABEL: ================ Instrumented CFG ================
+; DUMP-NEXT: define void @func() {
+; DUMP-NEXT: __0:
+; DUMP-NEXT:   %__$rz0 = alloca i8, i32 32, align 8
+; DUMP-NEXT:   %a = alloca i8, i32 64, align 8
+; DUMP-NEXT:   %b = alloca i8, i32 64, align 8
+; DUMP-NEXT:   %c = alloca i8, i32 64, align 8
+; DUMP-NEXT:   %d = alloca i8, i32 64, align 8
+; DUMP-NEXT:   %shadowIndex = lshr i32 %__$rz0, 3
+; DUMP-NEXT:   %firstShadowLoc = add i32 %shadowIndex, 536870912
+; DUMP-NEXT:   %__8 = add i32 %firstShadowLoc, 0
+; DUMP-NEXT:   store i32 -1, i32* %__8, align 1
+; DUMP-NEXT:   %__9 = add i32 %firstShadowLoc, 4
+; DUMP-NEXT:   store i32 -252, i32* %__9, align 1
+; DUMP-NEXT:   %__10 = add i32 %firstShadowLoc, 8
+; DUMP-NEXT:   store i32 -1, i32* %__10, align 1
+; DUMP-NEXT:   %__11 = add i32 %firstShadowLoc, 12
+; DUMP-NEXT:   store i32 -65536, i32* %__11, align 1
+; DUMP-NEXT:   %__12 = add i32 %firstShadowLoc, 16
+; DUMP-NEXT:   store i32 -1, i32* %__12, align 1
+; DUMP-NEXT:   %__13 = add i32 %firstShadowLoc, 20
+; DUMP-NEXT:   store i32 -256, i32* %__13, align 1
+; DUMP-NEXT:   %__14 = add i32 %firstShadowLoc, 24
+; DUMP-NEXT:   store i32 -1, i32* %__14, align 1
+; DUMP-NEXT:   %__15 = add i32 %firstShadowLoc, 28
+; DUMP-NEXT:   store i32 -64512, i32* %__15, align 1
+; DUMP-NEXT:   %__16 = add i32 %firstShadowLoc, 32
+; DUMP-NEXT:   store i32 -1, i32* %__16, align 1
+; DUMP-NEXT:   %m1 = call i32 @__asan_malloc(i32 42)
+; DUMP-NEXT:   store i8 50, i8* %a, align 1
+; DUMP-NEXT:   call void @__asan_free(i32 %m1)
+; DUMP-NEXT:   store i32 0, i32* %__8, align 1
+; DUMP-NEXT:   store i32 0, i32* %__9, align 1
+; DUMP-NEXT:   store i32 0, i32* %__10, align 1
+; DUMP-NEXT:   store i32 0, i32* %__11, align 1
+; DUMP-NEXT:   store i32 0, i32* %__12, align 1
+; DUMP-NEXT:   store i32 0, i32* %__13, align 1
+; DUMP-NEXT:   store i32 0, i32* %__14, align 1
+; DUMP-NEXT:   store i32 0, i32* %__15, align 1
+; DUMP-NEXT:   store i32 0, i32* %__16, align 1
+; DUMP-NEXT:   ret void
+; DUMP-NEXT: }
\ No newline at end of file
diff --git a/tests_lit/llvm2ice_tests/fused-alloca-arg.ll b/tests_lit/llvm2ice_tests/fused-alloca-arg.ll
index d76755e..1b009ba 100644
--- a/tests_lit/llvm2ice_tests/fused-alloca-arg.ll
+++ b/tests_lit/llvm2ice_tests/fused-alloca-arg.ll
@@ -49,16 +49,16 @@
 ; CHECK-LABEL:  caller2
 ; CHECK-NEXT:   sub    esp,0x6c
 ; CHECK-NEXT:   mov    eax,DWORD PTR [esp+0x70]
-; CHECK-NEXT:   mov    DWORD PTR [esp+0x40],eax
 ; CHECK-NEXT:   mov    DWORD PTR [esp+0x20],eax
+; CHECK-NEXT:   mov    DWORD PTR [esp+0x40],eax
 ; CHECK-NEXT:   mov    DWORD PTR [esp],eax
-; CHECK-NEXT:   lea    eax,[esp+0x40]
+; CHECK-NEXT:   lea    eax,[esp+0x20]
 ; CHECK-NEXT:   mov    DWORD PTR [esp+0x4],eax
-; CHECK-NEXT:   lea    eax,[esp+0x20]
-; CHECK-NEXT:   mov    DWORD PTR [esp+0x8],eax
 ; CHECK-NEXT:   lea    eax,[esp+0x40]
-; CHECK-NEXT:   mov    DWORD PTR [esp+0xc],eax
+; CHECK-NEXT:   mov    DWORD PTR [esp+0x8],eax
 ; CHECK-NEXT:   lea    eax,[esp+0x20]
+; CHECK-NEXT:   mov    DWORD PTR [esp+0xc],eax
+; CHECK-NEXT:   lea    eax,[esp+0x40]
 ; CHECK-NEXT:   mov    DWORD PTR [esp+0x10],eax
 ; CHECK-NEXT:   call
 ; CHECK-NEXT:   add    esp,0x6c
diff --git a/tests_lit/llvm2ice_tests/fused-alloca.ll b/tests_lit/llvm2ice_tests/fused-alloca.ll
index b7e683f..1b46305 100644
--- a/tests_lit/llvm2ice_tests/fused-alloca.ll
+++ b/tests_lit/llvm2ice_tests/fused-alloca.ll
@@ -21,8 +21,8 @@
 ; CHECK-LABEL: fused_small_align
 ; CHECK-NEXT: sub    esp,0x30
 ; CHECK-NEXT: mov    eax,DWORD PTR [esp+0x34]
-; CHECK-NEXT: mov    DWORD PTR [esp+0x1c],eax
 ; CHECK-NEXT: mov    DWORD PTR [esp+0x10],eax
+; CHECK-NEXT: mov    DWORD PTR [esp+0x18],eax
 ; CHECK-NEXT: mov    DWORD PTR [esp],eax
 ; CHECK-NEXT: add    esp,0x30
 
@@ -46,9 +46,9 @@
 ; CHECK-NEXT: sub    esp,0x80
 ; CHECK-NEXT: and    esp,0xffffffc0
 ; CHECK-NEXT: mov    eax,DWORD PTR [ebp+0x8]
-; CHECK-NEXT: mov    DWORD PTR [esp+0x60],eax
-; CHECK-NEXT: mov    DWORD PTR [esp],eax
 ; CHECK-NEXT: mov    DWORD PTR [esp+0x40],eax
+; CHECK-NEXT: mov    DWORD PTR [esp],eax
+; CHECK-NEXT: mov    DWORD PTR [esp+0x60],eax
 ; CHECK-NEXT: mov    esp,ebp
 ; CHECK-NEXT: pop    ebp
 
@@ -154,10 +154,10 @@
 ; CHECK-NEXT: add    edx,0x0
 ; CHECK-NEXT: sub    esp,0x10
 ; CHECK-NEXT: mov    ebx,esp
-; CHECK-NEXT: mov    DWORD PTR [ecx],eax
 ; CHECK-NEXT: mov    DWORD PTR [edx],eax
-; CHECK-NEXT: mov    DWORD PTR [ebp-0x24],eax
+; CHECK-NEXT: mov    DWORD PTR [ecx],eax
 ; CHECK-NEXT: mov    DWORD PTR [ebp-0x14],eax
+; CHECK-NEXT: mov    DWORD PTR [ebp-0x24],eax
 ; CHECK-NEXT: mov    DWORD PTR [ebx],eax
 ; CHECK-NEXT: mov    esp,ebp
 ; CHECK-NEXT: pop    ebp