Subzero. ARM32. Nonsfi.

Adds nonsfi support to the ARM32 backend.

BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1665263003 .
diff --git a/Makefile.standalone b/Makefile.standalone
index 4fbe9ea..43b1e80 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -447,7 +447,6 @@
           -i x8664,native,sse4.1,test_vector_ops \
           -i x8664,sandbox,sse4.1,Om1 \
           -i arm32,neon \
-          -e arm32,nonsfi \
           -e arm32,neon,test_vector_ops \
           -e arm32,neon,test_select
 	PNACL_BIN_PATH=$(PNACL_BIN_PATH) \
diff --git a/crosstest/test_cast_to_u1.ll b/crosstest/test_cast_to_u1.ll
index 31c1f07..3e4f07b 100644
--- a/crosstest/test_cast_to_u1.ll
+++ b/crosstest/test_cast_to_u1.ll
@@ -76,15 +76,17 @@
 define i32 @_Z4castIdbET0_T_(double %a) {
 entry:
 ;  %tobool = fcmp une double %a, 0.000000e+00
-  %tobool = fptoui double %a to i1
-  %tobool.ret_ext = zext i1 %tobool to i32
+  %tobool = fptoui double %a to i32
+  %tobool.i1 = trunc i32 %tobool to i1
+  %tobool.ret_ext = zext i1 %tobool.i1 to i32
   ret i32 %tobool.ret_ext
 }
 
 define i32 @_Z4castIfbET0_T_(float %a) {
 entry:
 ;  %tobool = fcmp une float %a, 0.000000e+00
-  %tobool = fptoui float %a to i1
-  %tobool.ret_ext = zext i1 %tobool to i32
+  %tobool = fptoui float %a to i32
+  %tobool.i1 = trunc i32 %tobool to i1
+  %tobool.ret_ext = zext i1 %tobool.i1 to i32
   ret i32 %tobool.ret_ext
 }
diff --git a/pydir/build-runtime.py b/pydir/build-runtime.py
index c735e85..2d5cf3d 100755
--- a/pydir/build-runtime.py
+++ b/pydir/build-runtime.py
@@ -23,6 +23,7 @@
             '-O2',
             '-filetype=obj',
             '-bitcode-format=llvm',
+            '-arm-enable-dwarf-eh=1',
             '-o', obj
     ] + extra_args, echo=verbose)
   shellcmd(['le32-nacl-objcopy',
diff --git a/pydir/crosstest.py b/pydir/crosstest.py
index dc2da2b..306f894 100755
--- a/pydir/crosstest.py
+++ b/pydir/crosstest.py
@@ -177,6 +177,7 @@
                   obj_sz])
         objs.append(obj_sz)
         shellcmd(['{bin}/pnacl-llc'.format(bin=bindir),
+                  '-arm-enable-dwarf-eh=1',
                   '-mtriple=' + triple,
                   '-externalize',
                   '-filetype=obj',
@@ -237,7 +238,7 @@
     compiler = '{bin}/{prefix}{cc}'.format(
         bin=bindir, prefix='pnacl-',
         cc='clang' if pure_c else 'clang++')
-    shellcmd([compiler,
+    shellcmd([compiler] + target_params + [
               args.driver,
               '-O2',
               '-o', bitcode_nonfinal,
@@ -253,6 +254,7 @@
               '-disable-opt',
               bitcode_nonfinal, '-S', '-o', bitcode])
     shellcmd(['{bin}/pnacl-llc'.format(bin=bindir),
+              '-arm-enable-dwarf-eh=1',
               '-mtriple=' + triple,
               '-externalize',
               '-filetype=obj',
diff --git a/pydir/crosstest_generator.py b/pydir/crosstest_generator.py
index 5dbaecb..8bc8f8d 100755
--- a/pydir/crosstest_generator.py
+++ b/pydir/crosstest_generator.py
@@ -42,6 +42,13 @@
   prefix = arch_map[target]
   return (prefix + ' ' + run_cmd) if prefix else run_cmd
 
+def NonsfiLoaderArch(target):
+  """Returns the arch for the nonsfi_loader"""
+  arch_map = { 'arm32' : 'arm',
+               'x8632' : 'x86-32',
+             }
+  return arch_map[target]
+
 
 def main():
   """Framework for cross test generation and execution.
@@ -176,8 +183,11 @@
               if sb == 'sandbox':
                 run_cmd = '{root}/run.py -q '.format(root=root) + run_cmd
               elif sb == 'nonsfi':
-                run_cmd = ('{root}/scons-out/opt-linux-x86-32/obj/src/nonsfi/' +
-                           'loader/nonsfi_loader ').format(root=root) + run_cmd
+                run_cmd = (
+                    '{root}/scons-out/opt-linux-{arch}/obj/src/nonsfi/' +
+                    'loader/nonsfi_loader ').format(
+                        root=root, arch=NonsfiLoaderArch(target)) + run_cmd
+                run_cmd = RunNativePrefix(args.toolchain_root, target, run_cmd)
               else:
                 run_cmd = RunNativePrefix(args.toolchain_root, target, run_cmd)
               if args.lit:
diff --git a/runtime/szrt.c b/runtime/szrt.c
index ec6b8cd..7ac5030 100644
--- a/runtime/szrt.c
+++ b/runtime/szrt.c
@@ -63,3 +63,4 @@
 //   memset  - call @llvm.memset.p0i8.i32
 // unsandboxed_irt:
 //   __nacl_read_tp
+//   __aeabi_read_tp [arm32 only]
diff --git a/runtime/szrt_asm_arm32.s b/runtime/szrt_asm_arm32.s
index 1d01909..54cb380 100644
--- a/runtime/szrt_asm_arm32.s
+++ b/runtime/szrt_asm_arm32.s
@@ -14,3 +14,6 @@
 
 	.text
 	.p2alignl 4,0xE7FEDEF0
+	.globl	__nacl_read_tp
+__nacl_read_tp:
+	b	__aeabi_read_tp
diff --git a/src/IceAssembler.cpp b/src/IceAssembler.cpp
index 28851f9..88c35d7 100644
--- a/src/IceAssembler.cpp
+++ b/src/IceAssembler.cpp
@@ -143,6 +143,12 @@
                          Buffer.size());
 }
 
+void Assembler::bindRelocOffset(RelocOffset *Offset) {
+  if (!getPreliminary()) {
+    Offset->setOffset(Buffer.getPosition());
+  }
+}
+
 void Assembler::emitIASBytes(GlobalContext *Ctx) const {
   Ostream &Str = Ctx->getStrEmit();
   intptr_t EndPosition = Buffer.size();
diff --git a/src/IceAssembler.h b/src/IceAssembler.h
index eda2f4b..b25d9b4 100644
--- a/src/IceAssembler.h
+++ b/src/IceAssembler.h
@@ -311,6 +311,8 @@
     return Buffer.createTextFixup(Text, BytesUsed);
   }
 
+  void bindRelocOffset(RelocOffset *Offset);
+
   void setNeedsTextFixup() { Buffer.setNeedsTextFixup(); }
   void resetNeedsTextFixup() { Buffer.resetNeedsTextFixup(); }
 
diff --git a/src/IceAssemblerARM32.cpp b/src/IceAssemblerARM32.cpp
index a9ef88e..f4e9855 100644
--- a/src/IceAssemblerARM32.cpp
+++ b/src/IceAssemblerARM32.cpp
@@ -609,12 +609,13 @@
     return InstARM32::InstSize;
   Ostream &Str = Ctx->getStrEmit();
   IValueT Inst = Asm.load<IValueT>(position());
+  const bool IsMovw = kind() == llvm::ELF::R_ARM_MOVW_ABS_NC ||
+                      kind() == llvm::ELF::R_ARM_MOVW_PREL_NC;
   Str << "\t"
-         "mov" << (kind() == llvm::ELF::R_ARM_MOVW_ABS_NC ? "w" : "t") << "\t"
+         "mov" << (IsMovw ? "w" : "t") << "\t"
       << RegARM32::getRegName(RegNumT::fixme((Inst >> kRdShift) & 0xF))
-      << ", #:" << (kind() == llvm::ELF::R_ARM_MOVW_ABS_NC ? "lower" : "upper")
-      << "16:" << symbol(Ctx, &Asm) << "\t@ .word "
-      << llvm::format_hex_no_prefix(Inst, 8) << "\n";
+      << ", #:" << (IsMovw ? "lower" : "upper") << "16:" << symbol(Ctx, &Asm)
+      << "\t@ .word " << llvm::format_hex_no_prefix(Inst, 8) << "\n";
   return InstARM32::InstSize;
 }
 
@@ -625,8 +626,7 @@
 
   const IValueT Inst = Asm->load<IValueT>(position());
   constexpr IValueT Imm16Mask = 0x000F0FFF;
-  const IValueT Imm16 =
-      offset() >> (kind() == llvm::ELF::R_ARM_MOVW_ABS_NC ? 0 : 16) & 0xffff;
+  const IValueT Imm16 = offset() & 0xffff;
   Asm->store(position(),
              (Inst & ~Imm16Mask) | ((Imm16 >> 12) << 16) | (Imm16 & 0xfff));
 }
@@ -635,8 +635,10 @@
                                                       const Constant *Value) {
   MoveRelocatableFixup *F =
       new (allocate<MoveRelocatableFixup>()) MoveRelocatableFixup();
-  F->set_kind(IsMovW ? llvm::ELF::R_ARM_MOVW_ABS_NC
-                     : llvm::ELF::R_ARM_MOVT_ABS);
+  F->set_kind(IsMovW ? (IsNonsfi ? llvm::ELF::R_ARM_MOVW_PREL_NC
+                                 : llvm::ELF::R_ARM_MOVW_ABS_NC)
+                     : (IsNonsfi ? llvm::ELF::R_ARM_MOVT_PREL
+                                 : llvm::ELF::R_ARM_MOVT_ABS));
   F->set_value(Value);
   Buffer.installFixup(F);
   return F;
diff --git a/src/IceAssemblerARM32.h b/src/IceAssemblerARM32.h
index 3ae857b..397a865 100644
--- a/src/IceAssemblerARM32.h
+++ b/src/IceAssemblerARM32.h
@@ -95,8 +95,8 @@
     const RegNumT FrameOrStackReg;
   };
 
-  explicit AssemblerARM32(bool use_far_branches = false)
-      : Assembler(Asm_ARM32) {
+  explicit AssemblerARM32(bool IsNonsfi, bool use_far_branches = false)
+      : Assembler(Asm_ARM32), IsNonsfi(IsNonsfi) {
     // TODO(kschimpf): Add mode if needed when branches are handled.
     (void)use_far_branches;
   }
@@ -551,6 +551,8 @@
 private:
   ENABLE_MAKE_UNIQUE;
 
+  const bool IsNonsfi;
+
   // A vector of pool-allocated x86 labels for CFG nodes.
   using LabelVector = std::vector<Label *>;
   LabelVector CfgNodeLabels;
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index ca65c0f..265085d 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -167,7 +167,6 @@
   Label *getOrCreateCfgNodeLabel(SizeT Number);
   Label *getOrCreateLocalLabel(SizeT Number);
   void bindLocalLabel(SizeT Number);
-  void bindRelocOffset(RelocOffset *Offset);
 
   bool fixupIsPCRel(FixupKind Kind) const override {
     // Currently assuming this is the only PC-rel relocation type used.
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 00856ca..19cd99c 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -106,13 +106,6 @@
 }
 
 template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bindRelocOffset(RelocOffset *Offset) {
-  if (!getPreliminary()) {
-    Offset->setOffset(Buffer.getPosition());
-  }
-}
-
-template <typename TraitsType>
 void AssemblerX86Base<TraitsType>::call(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(RexTypeIrrelevant, reg);
diff --git a/src/IceCfg.h b/src/IceCfg.h
index 8e81614..7bdc076 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -157,6 +157,19 @@
   }
   /// @}
 
+  /// \name Manage the Globals used by this function.
+  /// @{
+  std::unique_ptr<VariableDeclarationList> getGlobalInits() {
+    return std::move(GlobalInits);
+  }
+  void addGlobal(VariableDeclaration *Global) {
+    if (GlobalInits == nullptr) {
+      GlobalInits.reset(new VariableDeclarationList);
+    }
+    GlobalInits->push_back(Global);
+  }
+  /// @}
+
   /// \name Miscellaneous accessors.
   /// @{
   TargetLowering *getTarget() const { return Target.get(); }
@@ -166,9 +179,6 @@
     return llvm::dyn_cast<T>(TargetAssembler.get());
   }
   Assembler *releaseAssembler() { return TargetAssembler.release(); }
-  std::unique_ptr<VariableDeclarationList> getGlobalInits() {
-    return std::move(GlobalInits);
-  }
   bool hasComputedFrame() const;
   bool getFocusedTiming() const { return FocusedTiming; }
   void setFocusedTiming() { FocusedTiming = true; }
diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp
index f370d87..7627457 100644
--- a/src/IceELFObjectWriter.cpp
+++ b/src/IceELFObjectWriter.cpp
@@ -22,9 +22,9 @@
 #include "IceGlobalInits.h"
 #include "IceInst.h"
 #include "IceOperand.h"
-#include "llvm/Support/MathExtras.h"
 
-using namespace llvm::ELF;
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/MathExtras.h"
 
 namespace Ice {
 
@@ -419,11 +419,13 @@
           Section->appendZeros(Str, Init->getNumBytes());
           break;
         case VariableDeclaration::Initializer::RelocInitializerKind: {
-          const auto Reloc =
+          const auto *Reloc =
               llvm::cast<VariableDeclaration::RelocInitializer>(Init.get());
           AssemblerFixup NewFixup;
           NewFixup.set_position(Section->getCurrentSize());
-          NewFixup.set_kind(RelocationKind);
+          NewFixup.set_kind(Reloc->hasFixup() ? Reloc->getFixup()
+                                              : RelocationKind);
+          assert(NewFixup.kind() != llvm::ELF::R_ARM_NONE);
           constexpr bool SuppressMangling = true;
           NewFixup.set_value(Ctx.getConstantSym(
               Reloc->getOffset(), Reloc->getDeclaration()->mangleName(&Ctx),
diff --git a/src/IceGlobalInits.h b/src/IceGlobalInits.h
index 902e335..a111536 100644
--- a/src/IceGlobalInits.h
+++ b/src/IceGlobalInits.h
@@ -20,6 +20,7 @@
 #define SUBZERO_SRC_ICEGLOBALINITS_H
 
 #include "IceDefs.h"
+#include "IceFixups.h"
 #include "IceGlobalContext.h"
 #include "IceIntrinsics.h"
 #include "IceOperand.h"
@@ -321,7 +322,16 @@
     static std::unique_ptr<RelocInitializer>
     create(const GlobalDeclaration *Declaration,
            const RelocOffsetArray &OffsetExpr) {
-      return makeUnique<RelocInitializer>(Declaration, OffsetExpr);
+      constexpr bool NoFixup = false;
+      return makeUnique<RelocInitializer>(Declaration, OffsetExpr, NoFixup);
+    }
+
+    static std::unique_ptr<RelocInitializer>
+    create(const GlobalDeclaration *Declaration,
+           const RelocOffsetArray &OffsetExpr, FixupKind Fixup) {
+      constexpr bool HasFixup = true;
+      return makeUnique<RelocInitializer>(Declaration, OffsetExpr, HasFixup,
+                                          Fixup);
     }
 
     RelocOffsetT getOffset() const {
@@ -332,6 +342,12 @@
       return Offset;
     }
 
+    bool hasFixup() const { return HasFixup; }
+    FixupKind getFixup() const {
+      assert(HasFixup);
+      return Fixup;
+    }
+
     const GlobalDeclaration *getDeclaration() const { return Declaration; }
     SizeT getNumBytes() const final { return RelocAddrSize; }
     void dump(GlobalContext *Ctx, Ostream &Stream) const final;
@@ -344,14 +360,17 @@
     ENABLE_MAKE_UNIQUE;
 
     RelocInitializer(const GlobalDeclaration *Declaration,
-                     const RelocOffsetArray &OffsetExpr)
+                     const RelocOffsetArray &OffsetExpr, bool HasFixup,
+                     FixupKind Fixup = 0)
         : Initializer(RelocInitializerKind),
           Declaration(Declaration), // The global declaration used in the reloc.
-          OffsetExpr(OffsetExpr) {}
+          OffsetExpr(OffsetExpr), HasFixup(HasFixup), Fixup(Fixup) {}
 
     const GlobalDeclaration *Declaration;
     /// The offset to add to the relocation.
     const RelocOffsetArray OffsetExpr;
+    const bool HasFixup = false;
+    const FixupKind Fixup = 0;
   };
 
   /// Models the list of initializers.
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 4a0fc4c..f01cd53 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -354,7 +354,8 @@
                                              uint32_t ModifiedImm)
     : OperandARM32Flex(kFlexFpImm, Ty), ModifiedImm(ModifiedImm) {}
 
-bool OperandARM32FlexFpImm::canHoldImm(Operand *C, uint32_t *ModifiedImm) {
+bool OperandARM32FlexFpImm::canHoldImm(const Operand *C,
+                                       uint32_t *ModifiedImm) {
   switch (C->getType()) {
   default:
     llvm::report_fatal_error("Unhandled fp constant type.");
@@ -369,7 +370,7 @@
     static constexpr uint32_t AllowedBits = a | B | bbbbb | cdefgh;
     static_assert(AllowedBits == 0xFFF80000u,
                   "Invalid mask for f32 modified immediates.");
-    const float F32 = llvm::cast<ConstantFloat>(C)->getValue();
+    const float F32 = llvm::cast<const ConstantFloat>(C)->getValue();
     const uint32_t I32 = Utils::bitCopy<uint32_t>(F32);
     if (I32 & ~AllowedBits) {
       // constant has disallowed bits.
@@ -398,7 +399,7 @@
     static constexpr uint32_t AllowedBits = a | B | bbbbbbbb | cdefgh;
     static_assert(AllowedBits == 0xFFFF0000u,
                   "Invalid mask for f64 modified immediates.");
-    const double F64 = llvm::cast<ConstantDouble>(C)->getValue();
+    const double F64 = llvm::cast<const ConstantDouble>(C)->getValue();
     const uint64_t I64 = Utils::bitCopy<uint64_t>(F64);
     if (I64 & 0xFFFFFFFFu) {
       // constant has disallowed bits.
@@ -1725,6 +1726,9 @@
 void InstARM32Label::emitIAS(const Cfg *Func) const {
   auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
   Asm->bindLocalLabel(Func, this, Number);
+  if (OffsetReloc != nullptr) {
+    Asm->bindRelocOffset(OffsetReloc);
+  }
   if (Asm->needsTextFixup())
     emitUsingTextFixup(Func);
 }
@@ -1867,6 +1871,9 @@
   if (auto *CR = llvm::dyn_cast<ConstantRelocatable>(Src0)) {
     Str << "#:lower16:";
     CR->emitWithoutPrefix(Func->getTarget());
+    if (Func->getContext()->getFlags().getUseNonsfi()) {
+      Str << " - .";
+    }
   } else {
     Src0->emit(Func);
   }
@@ -1893,6 +1900,9 @@
   if (auto *CR = llvm::dyn_cast<ConstantRelocatable>(Src1)) {
     Str << "#:upper16:";
     CR->emitWithoutPrefix(Func->getTarget());
+    if (Func->getContext()->getFlags().getUseNonsfi()) {
+      Str << " - .";
+    }
   } else {
     Src1->emit(Func);
   }
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 96a95c5..5d53afd 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -265,7 +265,7 @@
     return Operand->getKind() == static_cast<OperandKind>(kFlexFpImm);
   }
 
-  static bool canHoldImm(Operand *C, uint32_t *ModifiedImm);
+  static bool canHoldImm(const Operand *C, uint32_t *ModifiedImm);
 
   uint32_t getModifiedImm() const { return ModifiedImm; }
 
@@ -973,10 +973,13 @@
   void emit(const Cfg *Func) const override;
   void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
+  void setRelocOffset(RelocOffset *Value) { OffsetReloc = Value; }
 
 private:
   InstARM32Label(Cfg *Func, TargetARM32 *Target);
 
+  RelocOffset *OffsetReloc = nullptr;
+
   SizeT Number; // used for unique label generation.
 };
 
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index cc4e964..ada1606 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -556,6 +556,10 @@
 void ConstantRelocatable::dump(const Cfg *Func, Ostream &Str) const {
   if (!BuildDefs::dump())
     return;
+  if (!EmitString.empty()) {
+    Str << EmitString;
+    return;
+  }
   Str << "@";
   if (Func && !SuppressMangling) {
     Str << Func->getContext()->mangleName(Name);
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 2935df9..9f05017 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -843,6 +843,10 @@
             llvm::cast<VariableDeclaration::RelocInitializer>(Init.get());
         Str << "\t" << getEmit32Directive() << "\t";
         Str << Reloc->getDeclaration()->mangleName(Ctx);
+        if (Reloc->hasFixup()) {
+          // TODO(jpp): this is ARM32 specific.
+          Str << "(GOTOFF)";
+        }
         if (RelocOffsetT Offset = Reloc->getOffset()) {
           if (Offset >= 0 || (Offset == INT32_MIN))
             Str << " + " << Offset;
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index f302e35..0b12bc6 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -52,6 +52,14 @@
 
 void staticInit(::Ice::GlobalContext *Ctx) {
   ::Ice::ARM32::TargetARM32::staticInit(Ctx);
+  if (Ctx->getFlags().getUseNonsfi()) {
+    // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
+    // globals. The GOT is an external symbol (i.e., it is not defined in the
+    // pexe) so we need to register it as such so that ELF emission won't barf
+    // on an "unknown" symbol. The GOT is added to the External symbols list
+    // here because staticInit() is invoked in a single-thread context.
+    Ctx->getConstantExternSym(::Ice::GlobalOffsetTable);
+  }
 }
 
 } // end of namespace ARM32
@@ -713,11 +721,13 @@
       return;
     }
     case Intrinsics::NaClReadTP: {
-      if (NeedSandboxing) {
+      if (SandboxingType == ST_NaCl) {
         return;
       }
       static constexpr SizeT MaxArgs = 0;
-      Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp);
+      const char *ReadTP =
+          SandboxingType == ST_Nonsfi ? "__aeabi_read_tp" : H_call_read_tp;
+      Operand *TargetHelper = Ctx->getConstantExternSym(ReadTP);
       Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
                                IsTargetHelperCall);
       Instr->setDeleted();
@@ -786,11 +796,150 @@
   }
 }
 
+void TargetARM32::createGotPtr() {
+  if (SandboxingType != ST_Nonsfi) {
+    return;
+  }
+  GotPtr = Func->makeVariable(IceType_i32);
+}
+
+void TargetARM32::insertGotPtrInitPlaceholder() {
+  if (SandboxingType != ST_Nonsfi) {
+    return;
+  }
+  assert(GotPtr != nullptr);
+  // We add the two placeholder instructions here. The first fakedefs T, an
+  // infinite-weight temporary, while the second fakedefs the GotPtr "using" T.
+  // This is needed because the GotPtr initialization, if needed, will require
+  // a register:
+  //
+  //   movw     reg, _GLOBAL_OFFSET_TABLE_ - 16 - .
+  //   movt     reg, _GLOBAL_OFFSET_TABLE_ - 12 - .
+  //   add      reg, pc, reg
+  //   mov      GotPtr, reg
+  //
+  // If GotPtr is not used, then both these pseudo-instructions are dce'd.
+  Variable *T = makeReg(IceType_i32);
+  Context.insert<InstFakeDef>(T);
+  Context.insert<InstFakeDef>(GotPtr, T);
+}
+
+IceString TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
+  const IceString &CRName = CR->getName();
+  const IceString CRGotoffName =
+      "GOTOFF$" + Func->getFunctionName() + "$" + CRName;
+  if (KnownGotoffs.count(CRGotoffName) == 0) {
+    auto *Global = VariableDeclaration::create(Ctx);
+    Global->setIsConstant(true);
+    Global->setName(CRName);
+    Global->setSuppressMangling();
+
+    auto *Gotoff = VariableDeclaration::create(Ctx);
+    constexpr auto GotFixup = R_ARM_GOTOFF32;
+    Gotoff->setIsConstant(true);
+    Gotoff->setName(CRGotoffName);
+    Gotoff->setSuppressMangling();
+    Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
+        Global, {RelocOffset::create(Ctx, 0)}, GotFixup));
+    Func->addGlobal(Gotoff);
+    KnownGotoffs.emplace(CRGotoffName);
+  }
+  return CRGotoffName;
+}
+
+void TargetARM32::materializeGotAddr(CfgNode *Node) {
+  if (SandboxingType != ST_Nonsfi) {
+    return;
+  }
+
+  // At first, we try to find the
+  //    GotPtr = def T
+  // pseudo-instruction that we placed for defining the got ptr. That
+  // instruction is not just a place-holder for defining the GotPtr (thus
+  // keeping liveness consistent), but it is also located at a point where it is
+  // safe to materialize the got addr -- i.e., before loading parameters to
+  // registers, but after moving register parameters from their home location.
+  InstFakeDef *DefGotPtr = nullptr;
+  for (auto &Inst : Node->getInsts()) {
+    auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst);
+    if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) {
+      DefGotPtr = FakeDef;
+      break;
+    }
+  }
+
+  if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) {
+    return;
+  }
+
+  // The got addr needs to be materialized at the same point where DefGotPtr
+  // lives.
+  Context.setInsertPoint(DefGotPtr);
+  assert(DefGotPtr->getSrcSize() == 1);
+  auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0));
+  loadNamedConstantRelocatablePIC(GlobalOffsetTable, T,
+                                  [this, T](Variable *PC) { _add(T, PC, T); });
+  _mov(GotPtr, T);
+  DefGotPtr->setDeleted();
+}
+
+void TargetARM32::loadNamedConstantRelocatablePIC(
+    const IceString &Name, Variable *Register,
+    std::function<void(Variable *PC)> Finish, bool SuppressMangling) {
+  assert(SandboxingType == ST_Nonsfi);
+  // We makeReg() here instead of getPhysicalRegister() because the latter ends
+  // up creating multi-blocks temporaries that liveness fails to validate.
+  auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);
+
+  auto *AddPcReloc = RelocOffset::create(Ctx);
+  AddPcReloc->setSubtract(true);
+  auto *AddPcLabel = InstARM32Label::create(Func, this);
+  AddPcLabel->setRelocOffset(AddPcReloc);
+
+  const IceString EmitText = Name;
+  // We need a -8 in the relocation expression to account for the pc's value
+  // read by the first instruction emitted in Finish(PC).
+  auto *Imm8 = RelocOffset::create(Ctx, -8);
+
+  auto *MovwReloc = RelocOffset::create(Ctx);
+  auto *MovwLabel = InstARM32Label::create(Func, this);
+  MovwLabel->setRelocOffset(MovwReloc);
+
+  auto *MovtReloc = RelocOffset::create(Ctx);
+  auto *MovtLabel = InstARM32Label::create(Func, this);
+  MovtLabel->setRelocOffset(MovtReloc);
+
+  // The EmitString for these constant relocatables have hardcoded offsets
+  // attached to them. This could be dangerous if, e.g., we ever implemented
+  // instruction scheduling but llvm-mc currently does not support
+  //
+  //   movw reg, #:lower16:(Symbol - Label - Number)
+  //   movt reg, #:upper16:(Symbol - Label - Number)
+  //
+  // relocations.
+  auto *CRLower = Ctx->getConstantSym({MovwReloc, AddPcReloc, Imm8}, Name,
+                                      EmitText + " -16", SuppressMangling);
+  auto *CRUpper = Ctx->getConstantSym({MovtReloc, AddPcReloc, Imm8}, Name,
+                                      EmitText + " -12", SuppressMangling);
+
+  Context.insert(MovwLabel);
+  _movw(Register, CRLower);
+  Context.insert(MovtLabel);
+  _movt(Register, CRUpper);
+  // PC = fake-def to keep liveness consistent.
+  Context.insert<InstFakeDef>(PC);
+  Context.insert(AddPcLabel);
+  Finish(PC);
+}
+
 void TargetARM32::translateO2() {
   TimerMarker T(TimerStack::TT_O2, Func);
 
-  // TODO(stichnot): share passes with X86?
+  // TODO(stichnot): share passes with other targets?
   // https://code.google.com/p/nativeclient/issues/detail?id=4094
+  if (SandboxingType == ST_Nonsfi) {
+    createGotPtr();
+  }
   genTargetHelperCalls();
   findMaxStackOutArgsSize();
 
@@ -837,6 +986,9 @@
     return;
   Func->dump("After ARM32 address mode opt");
 
+  if (SandboxingType == ST_Nonsfi) {
+    insertGotPtrInitPlaceholder();
+  }
   Func->genCode();
   if (Func->hasError())
     return;
@@ -901,7 +1053,11 @@
 void TargetARM32::translateOm1() {
   TimerMarker T(TimerStack::TT_Om1, Func);
 
-  // TODO: share passes with X86?
+  // TODO(stichnot): share passes with other targets?
+  if (SandboxingType == ST_Nonsfi) {
+    createGotPtr();
+  }
+
   genTargetHelperCalls();
   findMaxStackOutArgsSize();
 
@@ -923,6 +1079,9 @@
 
   Func->doArgLowering();
 
+  if (SandboxingType == ST_Nonsfi) {
+    insertGotPtrInitPlaceholder();
+  }
   Func->genCode();
   if (Func->hasError())
     return;
@@ -1418,6 +1577,8 @@
   if (!UsesFramePointer)
     BasicFrameOffset += SpillAreaSizeBytes;
 
+  materializeGotAddr(Node);
+
   const VarList &Args = Func->getArgs();
   size_t InArgsSizeBytes = 0;
   TargetARM32::CallingConv CC;
@@ -3540,16 +3701,20 @@
   CallTarget = legalizeToReg(CallTarget);
 
   // Copy arguments to be passed in registers to the appropriate registers.
+  CfgVector<Variable *> RegArgs;
   for (auto &FPArg : FPArgs) {
-    Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
-    Context.insert<InstFakeUse>(Reg);
+    RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
   }
   for (auto &GPRArg : GPRArgs) {
-    Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
-    // Generate a FakeUse of register arguments so that they do not get dead
-    // code eliminated as a result of the FakeKill of scratch registers after
-    // the call.
-    Context.insert<InstFakeUse>(Reg);
+    RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
+  }
+
+  // Generate a FakeUse of register arguments so that they do not get dead code
+  // eliminated as a result of the FakeKill of scratch registers after the call.
+  // These fake-uses need to be placed here to avoid argument registers from
+  // being used during the legalizeToReg() calls above.
+  for (auto *RegArg : RegArgs) {
+    Context.insert<InstFakeUse>(RegArg);
   }
 
   InstARM32Call *NewCall =
@@ -3954,12 +4119,12 @@
 #undef X
 };
 
-bool isFloatingPointZero(Operand *Src) {
-  if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {
+bool isFloatingPointZero(const Operand *Src) {
+  if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
     return Utils::isPositiveZero(F32->getValue());
   }
 
-  if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {
+  if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
     return Utils::isPositiveZero(F64->getValue());
   }
 
@@ -4892,7 +5057,7 @@
     llvm::report_fatal_error("memmove should have been prelowered.");
   }
   case Intrinsics::NaClReadTP: {
-    if (!NeedSandboxing) {
+    if (SandboxingType != ST_NaCl) {
       llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
     }
     Variable *TP = legalizeToReg(OperandARM32Mem::create(
@@ -5552,8 +5717,72 @@
   _trap();
 }
 
+namespace {
+// Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables,
+// and fp constants will need access to the GOT address.
+bool operandNeedsGot(const Operand *Opnd) {
+  if (llvm::isa<ConstantRelocatable>(Opnd)) {
+    return true;
+  }
+
+  if (llvm::isa<ConstantFloat>(Opnd)) {
+    uint32_t _;
+    return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_);
+  }
+
+  const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd);
+  if (F64 != nullptr) {
+    uint32_t _;
+    return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) &&
+           !isFloatingPointZero(F64);
+  }
+
+  return false;
+}
+
+// Returns whether Phi needs the GOT address (which it does if any of its
+// operands needs the GOT address.)
+bool phiNeedsGot(const InstPhi *Phi) {
+  if (Phi->isDeleted()) {
+    return false;
+  }
+
+  for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
+    if (operandNeedsGot(Phi->getSrc(I))) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// Returns whether **any** phi in Node needs the GOT address.
+bool anyPhiInNodeNeedsGot(CfgNode *Node) {
+  for (auto &Inst : Node->getPhis()) {
+    if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) {
+      return true;
+    }
+  }
+  return false;
+}
+
+} // end of anonymous namespace
+
 void TargetARM32::prelowerPhis() {
-  PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func);
+  CfgNode *Node = Context.getNode();
+
+  if (SandboxingType == ST_Nonsfi) {
+    assert(GotPtr != nullptr);
+    if (anyPhiInNodeNeedsGot(Node)) {
+      // If any phi instruction needs the GOT address, we place a
+      //   fake-use GotPtr
+      // in Node to prevent the GotPtr's initialization from being dead code
+      // eliminated.
+      Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr));
+    }
+  }
+
+  PhiLowering::prelowerPhis32Bit(this, Node, Func);
 }
 
 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
@@ -5716,8 +5945,18 @@
       }
     } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
       Variable *Reg = makeReg(Ty, RegNum);
-      _movw(Reg, C);
-      _movt(Reg, C);
+      if (SandboxingType != ST_Nonsfi) {
+        _movw(Reg, C);
+        _movt(Reg, C);
+      } else {
+        auto *GotAddr = legalizeToReg(GotPtr);
+        const IceString CGotoffName = createGotoffRelocation(C);
+        loadNamedConstantRelocatablePIC(
+            CGotoffName, Reg, [this, Reg](Variable *PC) {
+              _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg));
+            });
+        _add(Reg, GotAddr, Reg);
+      }
       return Reg;
     } else {
       assert(isScalarFloatingType(Ty));
@@ -5744,9 +5983,17 @@
       llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
       llvm::cast<Constant>(From)->setShouldBePooled(true);
       Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
-      Variable *BaseReg = makeReg(getPointerType());
-      _movw(BaseReg, Offset);
-      _movt(BaseReg, Offset);
+      Variable *BaseReg = nullptr;
+      if (SandboxingType == ST_Nonsfi) {
+        // vldr does not support the [base, index] addressing mode, so we need
+        // to legalize Offset to a register. Otherwise, we could simply
+        //   vldr dest, [got, reg(Offset)]
+        BaseReg = legalizeToReg(Offset);
+      } else {
+        BaseReg = makeReg(getPointerType());
+        _movw(BaseReg, Offset);
+        _movt(BaseReg, Offset);
+      }
       From = formMemoryOperand(BaseReg, Ty);
       return copyToReg(From, RegNum);
     }
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 02c13fc..267c766 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -24,6 +24,8 @@
 
 #include "llvm/ADT/SmallBitVector.h"
 
+#include <unordered_set>
+
 namespace Ice {
 namespace ARM32 {
 
@@ -64,7 +66,8 @@
   }
 
   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
-    return makeUnique<ARM32::AssemblerARM32>();
+    const bool IsNonsfi = SandboxingType == ST_Nonsfi;
+    return makeUnique<ARM32::AssemblerARM32>(IsNonsfi);
   }
 
   void initNodeForLowering(CfgNode *Node) override {
@@ -856,6 +859,48 @@
 
   void postLowerLegalization();
 
+  /// Manages the GotPtr variable, which is used for Nonsfi sandboxing.
+  /// @{
+  void createGotPtr();
+  void insertGotPtrInitPlaceholder();
+  VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc);
+  void materializeGotAddr(CfgNode *Node);
+  Variable *GotPtr = nullptr;
+  // TODO(jpp): use CfgLocalAllocator.
+  /// @}
+
+  /// Manages the Gotoff relocations created during the function lowering. A
+  /// single Gotoff relocation is created for each global variable used by the
+  /// function being lowered.
+  /// @{
+  // TODO(jpp): if the same global G is used in different functions, then this
+  // method will emit one G(gotoff) relocation per function.
+  IceString createGotoffRelocation(const ConstantRelocatable *CR);
+  std::unordered_set<IceString> KnownGotoffs;
+  /// @}
+
+  /// Loads the constant relocatable Name to Register. Then invoke Finish to
+  /// finish the relocatable lowering. Finish **must** use PC in its first
+  /// emitted instruction, or the relocatable in Register will contain the wrong
+  /// value.
+  //
+  // Lowered sequence:
+  //
+  // Movw:
+  //     movw Register, #:lower16:Name - (End - Movw) - 8 .
+  // Movt:
+  //     movt Register, #:upper16:Name - (End - Movt) - 8 .
+  //     PC = fake-def
+  // End:
+  //     Finish(PC)
+  //
+  // The -8 in movw/movt above is to account for the PC value that the first
+  // instruction emitted by Finish(PC) will read.
+  void loadNamedConstantRelocatablePIC(const IceString &Name,
+                                       Variable *Register,
+                                       std::function<void(Variable *PC)> Finish,
+                                       bool SuppressMangling = true);
+
   /// Sandboxer defines methods for ensuring that "dangerous" operations are
   /// masked during sandboxed code emission. For regular, non-sandboxed code
   /// emission, its methods are simple pass-through methods.
diff --git a/tests_lit/llvm2ice_tests/nonsfi.ll b/tests_lit/llvm2ice_tests/nonsfi.ll
index 5b80c42..cb2a25b 100644
--- a/tests_lit/llvm2ice_tests/nonsfi.ll
+++ b/tests_lit/llvm2ice_tests/nonsfi.ll
@@ -1,10 +1,14 @@
-; RUN: %p2i -i %s --filetype=obj --assemble --disassemble --args -O2 -nonsfi=1 \
-; RUN:   --ffunction-sections \
+; RUN: %p2i -i %s --target=x8632 --filetype=obj --assemble --disassemble \
+; RUN:   --args -O2 -nonsfi=1 --ffunction-sections \
 ; RUN:   | FileCheck --check-prefix=NONSFI %s
-; RUN: %p2i -i %s --filetype=obj --assemble --disassemble --args -O2 -nonsfi=0 \
-; RUN:   --ffunction-sections \
+; RUN: %p2i -i %s --target=x8632 --filetype=obj --assemble --disassemble \
+; RUN:   --args -O2 -nonsfi=0 --ffunction-sections \
 ; RUN:   | FileCheck --check-prefix=DEFAULT %s
 
+; RUN: %p2i -i %s --target=arm32 --filetype=obj --assemble --disassemble \
+; RUN:   --args -O2 -nonsfi=1 --ffunction-sections \
+; RUN:   | FileCheck --check-prefix=ARM32-NONSFI %s
+
 @G1 = internal global [4 x i8] zeroinitializer, align 4
 @G2 = internal global [4 x i8] zeroinitializer, align 4
 
@@ -20,6 +24,13 @@
 ; NONSFI: call {{.*}} R_386_PC32 {{.*}}testLoadBasic
 ; DEFAULT-LABEL: testCallRegular
 
+; ARM32-NONSFI-LABEL: testCallRegular
+; ARM32-NONSFI:      movw [[REG:r[0-9]+]], {{.*}} R_ARM_MOVW_PREL_NC GOTOFF{{.*}}testLoadBasic
+; ARM32-NONSFI-NEXT: movt [[REG:r[0-9]+]], {{.*}} R_ARM_MOVT_PREL GOTOFF{{.*}}testLoadBasic
+; ARM32-NONSFI-NEXT: ldr [[GOTOFF:r[0-9]+]], [pc, [[REG]]]
+; ARM32-NONSFI-NEXT: add [[CT:r[0-9]+]], {{.*}}, [[CT]]
+; ARM32-NONSFI:      blx [[CT]]
+
 define internal double @testCallBuiltin(double %val) {
 entry:
   %result = frem double %val, %val
@@ -30,6 +41,13 @@
 ; NONSFI: call {{.*}} R_386_PC32 fmod
 ; DEFAULT-LABEL: testCallBuiltin
 
+; ARM32-NONSFI-LABEL: testCallBuiltin
+; ARM32-NONSFI:      movw [[REG:r[0-9]+]], {{.*}} R_ARM_MOVW_PREL_NC GOTOFF{{.*}}fmod
+; ARM32-NONSFI-NEXT: movt [[REG:r[0-9]+]], {{.*}} R_ARM_MOVT_PREL GOTOFF{{.*}}fmod
+; ARM32-NONSFI-NEXT: ldr [[GOTOFF:r[0-9]+]], [pc, [[REG]]]
+; ARM32-NONSFI-NEXT: add [[CT:r[0-9]+]], {{.*}}, [[CT]]
+; ARM32-NONSFI:      blx [[CT]]
+
 define internal i32 @testLoadBasic() {
 entry:
   %a = bitcast [4 x i8]* @G1 to i32*
@@ -41,6 +59,14 @@
 ; NONSFI: mov {{.*}} R_386_GOTOFF {{G1|.bss}}
 ; DEFAULT-LABEL: testLoadBasic
 
+; ARM32 PIC load.
+; ARM32-NONSFI-LABEL: testLoadBasic
+; ARM32-NONSFI:      movw {{.*}} R_ARM_MOVW_PREL_NC _GLOBAL_OFFSET_TABLE_
+; ARM32-NONSFI-NEXT: movt {{.*}} R_ARM_MOVT_PREL _GLOBAL_OFFSET_TABLE_
+; ARM32-NONSFI:      movw [[REG:r[0-9]+]], {{.*}} R_ARM_MOVW_PREL_NC {{.*}}G1
+; ARM32-NONSFI-NEXT: movt [[REG]], {{.*}} R_ARM_MOVT_PREL {{.*}}G1
+; ARM32-NONSFI-NEXT: ldr r{{[0-9]+}}, [pc, [[REG]]]
+
 define internal i32 @testLoadFixedOffset() {
 entry:
   %a = ptrtoint [4 x i8]* @G1 to i32
@@ -54,6 +80,15 @@
 ; NONSFI: mov {{.*}}+0x4] {{.*}} R_386_GOTOFF {{G1|.bss}}
 ; DEFAULT-LABEL: testLoadFixedOffset
 
+; ARM32-NONSFI-LABEL: testLoadFixedOffset
+; ARM32-NONSFI:      movw [[GOT:r[0-9]+]], {{.*}} R_ARM_MOVW_PREL_NC _GLOBAL_OFFSET_TABLE_
+; ARM32-NONSFI-NEXT: movt [[GOT]], {{.*}} R_ARM_MOVT_PREL _GLOBAL_OFFSET_TABLE_
+; ARM32-NONSFI:      movw [[REG:r[0-9]+]], {{.*}} R_ARM_MOVW_PREL_NC {{.*}}G1
+; ARM32-NONSFI-NEXT: movt [[REG]], {{.*}} R_ARM_MOVT_PREL {{.*}}G1
+; ARM32-NONSFI-NEXT: ldr [[ADDR:r[0-9]+]], [pc, [[REG]]]
+; ARM32-NONSFI-NEXT: add [[G1BASE:r[0-9]+]], [[GOT]], [[ADDR]]
+; ARM32-NONSFI-NEXT: add {{.*}}, [[G1BASE]], #4
+
 define internal i32 @testLoadIndexed(i32 %idx) {
 entry:
   %a = ptrtoint [4 x i8]* @G1 to i32
@@ -70,6 +105,15 @@
 ; NONSFI: mov {{.*}}*4+0xc] {{.*}} R_386_GOTOFF {{G1|.bss}}
 ; DEFAULT-LABEL: testLoadIndexed
 
+; ARM32-NONSFI-LABEL: testLoadIndexed
+; ARM32-NONSFI:      movw [[GOT:r[0-9]+]], {{.*}} R_ARM_MOVW_PREL_NC _GLOBAL_OFFSET_TABLE_
+; ARM32-NONSFI-NEXT: movt [[GOT]], {{.*}} R_ARM_MOVT_PREL _GLOBAL_OFFSET_TABLE_
+; ARM32-NONSFI:      movw [[REG:r[0-9]+]], {{.*}} R_ARM_MOVW_PREL_NC {{.*}}G1
+; ARM32-NONSFI-NEXT: movt [[REG]], {{.*}} R_ARM_MOVT_PREL {{.*}}G1
+; ARM32-NONSFI-NEXT: ldr [[ADDR:r[0-9]+]], [pc, [[REG]]]
+; ARM32-NONSFI-NEXT: add [[G1BASE:r[0-9]+]], [[GOT]], [[ADDR]]
+; ARaM32-NONSFI-NEXT: add {{.*}}, [[G1BASE]]
+
 define internal i32 @testLoadIndexedBase(i32 %base, i32 %idx) {
 entry:
   %a = ptrtoint [4 x i8]* @G1 to i32