| //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 
 | // | 
 | //                        The Subzero Code Generator | 
 | // | 
 | // This file is distributed under the University of Illinois Open Source | 
 | // License. See LICENSE.TXT for details. | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 | /// | 
 | /// \file | 
 | /// \brief Implements the TargetLoweringARM32 class, which consists almost | 
 | /// entirely of the lowering sequence for each high-level instruction. | 
 | /// | 
 | //===----------------------------------------------------------------------===// | 
 | #include "IceTargetLoweringARM32.h" | 
 |  | 
 | #include "IceCfg.h" | 
 | #include "IceCfgNode.h" | 
 | #include "IceClFlags.h" | 
 | #include "IceDefs.h" | 
 | #include "IceELFObjectWriter.h" | 
 | #include "IceGlobalInits.h" | 
 | #include "IceInstARM32.def" | 
 | #include "IceInstARM32.h" | 
 | #include "IceInstVarIter.h" | 
 | #include "IceLiveness.h" | 
 | #include "IceOperand.h" | 
 | #include "IcePhiLoweringImpl.h" | 
 | #include "IceRegistersARM32.h" | 
 | #include "IceTargetLoweringARM32.def" | 
 | #include "IceUtils.h" | 
 | #include "llvm/Support/MathExtras.h" | 
 |  | 
 | #include <algorithm> | 
 | #include <array> | 
 | #include <utility> | 
 |  | 
 | namespace ARM32 { | 
 | std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) { | 
 |   return ::Ice::ARM32::TargetARM32::create(Func); | 
 | } | 
 |  | 
 | std::unique_ptr<::Ice::TargetDataLowering> | 
 | createTargetDataLowering(::Ice::GlobalContext *Ctx) { | 
 |   return ::Ice::ARM32::TargetDataARM32::create(Ctx); | 
 | } | 
 |  | 
 | std::unique_ptr<::Ice::TargetHeaderLowering> | 
 | createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { | 
 |   return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); | 
 | } | 
 |  | 
 | void staticInit(::Ice::GlobalContext *Ctx) { | 
 |   ::Ice::ARM32::TargetARM32::staticInit(Ctx); | 
 |   if (Ice::getFlags().getUseNonsfi()) { | 
 |     // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing | 
 |     // globals. The GOT is an external symbol (i.e., it is not defined in the | 
 |     // pexe) so we need to register it as such so that ELF emission won't barf | 
 |     // on an "unknown" symbol. The GOT is added to the External symbols list | 
 |     // here because staticInit() is invoked in a single-thread context. | 
 |     Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable)); | 
 |   } | 
 | } | 
 |  | 
 | bool shouldBePooled(const ::Ice::Constant *C) { | 
 |   return ::Ice::ARM32::TargetARM32::shouldBePooled(C); | 
 | } | 
 |  | 
 | ::Ice::Type getPointerType() { | 
 |   return ::Ice::ARM32::TargetARM32::getPointerType(); | 
 | } | 
 |  | 
 | } // end of namespace ARM32 | 
 |  | 
 | namespace Ice { | 
 | namespace ARM32 { | 
 |  | 
 | namespace { | 
 |  | 
 | /// SizeOf is used to obtain the size of an initializer list as a constexpr | 
 | /// expression. This is only needed until our C++ library is updated to | 
 | /// C++ 14 -- which defines constexpr members to std::initializer_list. | 
 | class SizeOf { | 
 |   SizeOf(const SizeOf &) = delete; | 
 |   SizeOf &operator=(const SizeOf &) = delete; | 
 |  | 
 | public: | 
 |   constexpr SizeOf() : Size(0) {} | 
 |   template <typename... T> | 
 |   explicit constexpr SizeOf(T...) | 
 |       : Size(__length<T...>::value) {} | 
 |   constexpr SizeT size() const { return Size; } | 
 |  | 
 | private: | 
 |   template <typename T, typename... U> struct __length { | 
 |     static constexpr std::size_t value = 1 + __length<U...>::value; | 
 |   }; | 
 |  | 
 |   template <typename T> struct __length<T> { | 
 |     static constexpr std::size_t value = 1; | 
 |   }; | 
 |  | 
 |   const std::size_t Size; | 
 | }; | 
 |  | 
 | } // end of anonymous namespace | 
 |  | 
 | // Defines the RegARM32::Table table with register information. | 
 | RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = { | 
 | #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \ | 
 |           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \ | 
 |   {                                                                            \ | 
 |     name, encode, cc_arg, scratch, preserved, stackptr, frameptr, isGPR,       \ | 
 |         isInt, isI64Pair, isFP32, isFP64, isVec128,                            \ | 
 |         (SizeOf alias_init).size(), alias_init                                 \ | 
 |   }                                                                            \ | 
 |   , | 
 |     REGARM32_TABLE | 
 | #undef X | 
 | }; | 
 |  | 
 | namespace { | 
 |  | 
 | // The following table summarizes the logic for lowering the icmp instruction | 
 | // for i32 and narrower types. Each icmp condition has a clear mapping to an | 
 | // ARM32 conditional move instruction. | 
 |  | 
 | const struct TableIcmp32_ { | 
 |   CondARM32::Cond Mapping; | 
 | } TableIcmp32[] = { | 
 | #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \ | 
 |   { CondARM32::C_32 }                                                          \ | 
 |   , | 
 |     ICMPARM32_TABLE | 
 | #undef X | 
 | }; | 
 |  | 
 | // The following table summarizes the logic for lowering the icmp instruction | 
 | // for the i64 type. Two conditional moves are needed for setting to 1 or 0. | 
 | // The operands may need to be swapped, and there is a slight difference for | 
 | // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). | 
 | const struct TableIcmp64_ { | 
 |   bool IsSigned; | 
 |   bool Swapped; | 
 |   CondARM32::Cond C1, C2; | 
 | } TableIcmp64[] = { | 
 | #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \ | 
 |   { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 }                 \ | 
 |   , | 
 |     ICMPARM32_TABLE | 
 | #undef X | 
 | }; | 
 |  | 
 | CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) { | 
 |   assert(Cond < llvm::array_lengthof(TableIcmp32)); | 
 |   return TableIcmp32[Cond].Mapping; | 
 | } | 
 |  | 
 | // In some cases, there are x-macros tables for both high-level and low-level | 
 | // instructions/operands that use the same enum key value. The tables are kept | 
 | // separate to maintain a proper separation between abstraction layers. There | 
 | // is a risk that the tables could get out of sync if enum values are reordered | 
 | // or if entries are added or deleted. The following anonymous namespaces use | 
 | // static_asserts to ensure everything is kept in sync. | 
 |  | 
 | // Validate the enum values in ICMPARM32_TABLE. | 
 | namespace { | 
 | // Define a temporary set of enum values based on low-level table entries. | 
 | enum _icmp_ll_enum { | 
 | #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \ | 
 |   _icmp_ll_##val, | 
 |   ICMPARM32_TABLE | 
 | #undef X | 
 |       _num | 
 | }; | 
 | // Define a set of constants based on high-level table entries. | 
 | #define X(tag, reverse, str)                                                   \ | 
 |   static constexpr int _icmp_hl_##tag = InstIcmp::tag; | 
 | ICEINSTICMP_TABLE | 
 | #undef X | 
 | // Define a set of constants based on low-level table entries, and ensure the | 
 | // table entry keys are consistent. | 
 | #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \ | 
 |   static_assert(                                                               \ | 
 |       _icmp_ll_##val == _icmp_hl_##val,                                        \ | 
 |       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val); | 
 | ICMPARM32_TABLE | 
 | #undef X | 
 | // Repeat the static asserts with respect to the high-level table entries in | 
 | // case the high-level table has extra entries. | 
 | #define X(tag, reverse, str)                                                   \ | 
 |   static_assert(                                                               \ | 
 |       _icmp_hl_##tag == _icmp_ll_##tag,                                        \ | 
 |       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag); | 
 | ICEINSTICMP_TABLE | 
 | #undef X | 
 | } // end of anonymous namespace | 
 |  | 
 | // Stack alignment | 
 | const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; | 
 |  | 
 | // Value is in bytes. Return Value adjusted to the next highest multiple of the | 
 | // stack alignment. | 
 | uint32_t applyStackAlignment(uint32_t Value) { | 
 |   return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); | 
 | } | 
 |  | 
 | // Value is in bytes. Return Value adjusted to the next highest multiple of the | 
 | // stack alignment required for the given type. | 
 | uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { | 
 |   // Use natural alignment, except that normally (non-NaCl) ARM only aligns | 
 |   // vectors to 8 bytes. | 
 |   // TODO(jvoung): Check this ... | 
 |   size_t typeAlignInBytes = typeWidthInBytes(Ty); | 
 |   if (isVectorType(Ty)) | 
 |     typeAlignInBytes = 8; | 
 |   return Utils::applyAlignment(Value, typeAlignInBytes); | 
 | } | 
 |  | 
 | // Conservatively check if at compile time we know that the operand is | 
 | // definitely a non-zero integer. | 
 | bool isGuaranteedNonzeroInt(const Operand *Op) { | 
 |   if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) { | 
 |     return Const->getValue() != 0; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | } // end of anonymous namespace | 
 |  | 
 | TargetARM32Features::TargetARM32Features(const ClFlags &Flags) { | 
 |   static_assert( | 
 |       (ARM32InstructionSet::End - ARM32InstructionSet::Begin) == | 
 |           (TargetInstructionSet::ARM32InstructionSet_End - | 
 |            TargetInstructionSet::ARM32InstructionSet_Begin), | 
 |       "ARM32InstructionSet range different from TargetInstructionSet"); | 
 |   if (Flags.getTargetInstructionSet() != | 
 |       TargetInstructionSet::BaseInstructionSet) { | 
 |     InstructionSet = static_cast<ARM32InstructionSet>( | 
 |         (Flags.getTargetInstructionSet() - | 
 |          TargetInstructionSet::ARM32InstructionSet_Begin) + | 
 |         ARM32InstructionSet::Begin); | 
 |   } | 
 | } | 
 |  | 
 | namespace { | 
 | constexpr SizeT NumGPRArgs = | 
 | #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \ | 
 |           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \ | 
 |   +(((cc_arg) > 0) ? 1 : 0) | 
 |     REGARM32_GPR_TABLE | 
 | #undef X | 
 |     ; | 
 | std::array<RegNumT, NumGPRArgs> GPRArgInitializer; | 
 |  | 
 | constexpr SizeT NumI64Args = | 
 | #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \ | 
 |           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \ | 
 |   +(((cc_arg) > 0) ? 1 : 0) | 
 |     REGARM32_I64PAIR_TABLE | 
 | #undef X | 
 |     ; | 
 | std::array<RegNumT, NumI64Args> I64ArgInitializer; | 
 |  | 
 | constexpr SizeT NumFP32Args = | 
 | #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \ | 
 |           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \ | 
 |   +(((cc_arg) > 0) ? 1 : 0) | 
 |     REGARM32_FP32_TABLE | 
 | #undef X | 
 |     ; | 
 | std::array<RegNumT, NumFP32Args> FP32ArgInitializer; | 
 |  | 
 | constexpr SizeT NumFP64Args = | 
 | #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \ | 
 |           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \ | 
 |   +(((cc_arg) > 0) ? 1 : 0) | 
 |     REGARM32_FP64_TABLE | 
 | #undef X | 
 |     ; | 
 | std::array<RegNumT, NumFP64Args> FP64ArgInitializer; | 
 |  | 
 | constexpr SizeT NumVec128Args = | 
 | #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \ | 
 |           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \ | 
 |   +(((cc_arg > 0)) ? 1 : 0) | 
 |     REGARM32_VEC128_TABLE | 
 | #undef X | 
 |     ; | 
 | std::array<RegNumT, NumVec128Args> Vec128ArgInitializer; | 
 |  | 
 | const char *getRegClassName(RegClass C) { | 
 |   auto ClassNum = static_cast<RegARM32::RegClassARM32>(C); | 
 |   assert(ClassNum < RegARM32::RCARM32_NUM); | 
 |   switch (ClassNum) { | 
 |   default: | 
 |     assert(C < RC_Target); | 
 |     return regClassString(C); | 
 |   // Add handling of new register classes below. | 
 |   case RegARM32::RCARM32_QtoS: | 
 |     return "QtoS"; | 
 |   } | 
 | } | 
 |  | 
 | } // end of anonymous namespace | 
 |  | 
 | TargetARM32::TargetARM32(Cfg *Func) | 
 |     : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl), | 
 |       CPUFeatures(getFlags()) {} | 
 |  | 
 | void TargetARM32::staticInit(GlobalContext *Ctx) { | 
 |   RegNumT::setLimit(RegARM32::Reg_NUM); | 
 |   // Limit this size (or do all bitsets need to be the same width)??? | 
 |   SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); | 
 |   SmallBitVector I64PairRegisters(RegARM32::Reg_NUM); | 
 |   SmallBitVector Float32Registers(RegARM32::Reg_NUM); | 
 |   SmallBitVector Float64Registers(RegARM32::Reg_NUM); | 
 |   SmallBitVector VectorRegisters(RegARM32::Reg_NUM); | 
 |   SmallBitVector QtoSRegisters(RegARM32::Reg_NUM); | 
 |   SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); | 
 |   const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding; | 
 |   for (int i = 0; i < RegARM32::Reg_NUM; ++i) { | 
 |     const auto &Entry = RegARM32::RegTable[i]; | 
 |     IntegerRegisters[i] = Entry.IsInt; | 
 |     I64PairRegisters[i] = Entry.IsI64Pair; | 
 |     Float32Registers[i] = Entry.IsFP32; | 
 |     Float64Registers[i] = Entry.IsFP64; | 
 |     VectorRegisters[i] = Entry.IsVec128; | 
 |     RegisterAliases[i].resize(RegARM32::Reg_NUM); | 
 |     // TODO(eholk): It would be better to store a QtoS flag in the | 
 |     // IceRegistersARM32 table than to compare their encodings here. | 
 |     QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8; | 
 |     for (int j = 0; j < Entry.NumAliases; ++j) { | 
 |       assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]); | 
 |       RegisterAliases[i].set(Entry.Aliases[j]); | 
 |     } | 
 |     assert(RegisterAliases[i][i]); | 
 |     if (Entry.CCArg <= 0) { | 
 |       continue; | 
 |     } | 
 |     const auto RegNum = RegNumT::fromInt(i); | 
 |     if (Entry.IsGPR) { | 
 |       GPRArgInitializer[Entry.CCArg - 1] = RegNum; | 
 |     } else if (Entry.IsI64Pair) { | 
 |       I64ArgInitializer[Entry.CCArg - 1] = RegNum; | 
 |     } else if (Entry.IsFP32) { | 
 |       FP32ArgInitializer[Entry.CCArg - 1] = RegNum; | 
 |     } else if (Entry.IsFP64) { | 
 |       FP64ArgInitializer[Entry.CCArg - 1] = RegNum; | 
 |     } else if (Entry.IsVec128) { | 
 |       Vec128ArgInitializer[Entry.CCArg - 1] = RegNum; | 
 |     } | 
 |   } | 
 |   TypeToRegisterSet[IceType_void] = InvalidRegisters; | 
 |   TypeToRegisterSet[IceType_i1] = IntegerRegisters; | 
 |   TypeToRegisterSet[IceType_i8] = IntegerRegisters; | 
 |   TypeToRegisterSet[IceType_i16] = IntegerRegisters; | 
 |   TypeToRegisterSet[IceType_i32] = IntegerRegisters; | 
 |   TypeToRegisterSet[IceType_i64] = I64PairRegisters; | 
 |   TypeToRegisterSet[IceType_f32] = Float32Registers; | 
 |   TypeToRegisterSet[IceType_f64] = Float64Registers; | 
 |   TypeToRegisterSet[IceType_v4i1] = VectorRegisters; | 
 |   TypeToRegisterSet[IceType_v8i1] = VectorRegisters; | 
 |   TypeToRegisterSet[IceType_v16i1] = VectorRegisters; | 
 |   TypeToRegisterSet[IceType_v16i8] = VectorRegisters; | 
 |   TypeToRegisterSet[IceType_v8i16] = VectorRegisters; | 
 |   TypeToRegisterSet[IceType_v4i32] = VectorRegisters; | 
 |   TypeToRegisterSet[IceType_v4f32] = VectorRegisters; | 
 |   TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters; | 
 |  | 
 |   for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) | 
 |     TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; | 
 |  | 
 |   filterTypeToRegisterSet(Ctx, RegARM32::Reg_NUM, TypeToRegisterSet, | 
 |                           llvm::array_lengthof(TypeToRegisterSet), | 
 |                           [](RegNumT RegNum) -> std::string { | 
 |                             // This function simply removes ", " from the | 
 |                             // register name. | 
 |                             std::string Name = RegARM32::getRegName(RegNum); | 
 |                             constexpr const char RegSeparator[] = ", "; | 
 |                             constexpr size_t RegSeparatorWidth = | 
 |                                 llvm::array_lengthof(RegSeparator) - 1; | 
 |                             for (size_t Pos = Name.find(RegSeparator); | 
 |                                  Pos != std::string::npos; | 
 |                                  Pos = Name.find(RegSeparator)) { | 
 |                               Name.replace(Pos, RegSeparatorWidth, ""); | 
 |                             } | 
 |                             return Name; | 
 |                           }, | 
 |                           getRegClassName); | 
 | } | 
 |  | 
 | namespace { | 
 | void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { | 
 |   for (Variable *Var : Vars) { | 
 |     auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); | 
 |     if (!Var64) { | 
 |       // This is not the variable we are looking for. | 
 |       continue; | 
 |     } | 
 |     // only allow infinite-weight i64 temporaries to be register allocated. | 
 |     assert(!Var64->hasReg() || Var64->mustHaveReg()); | 
 |     if (!Var64->hasReg()) { | 
 |       continue; | 
 |     } | 
 |     const auto FirstReg = | 
 |         RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum())); | 
 |     // This assumes little endian. | 
 |     Variable *Lo = Var64->getLo(); | 
 |     Variable *Hi = Var64->getHi(); | 
 |     assert(Lo->hasReg() == Hi->hasReg()); | 
 |     if (Lo->hasReg()) { | 
 |       continue; | 
 |     } | 
 |     Lo->setRegNum(FirstReg); | 
 |     Lo->setMustHaveReg(); | 
 |     Hi->setRegNum(RegNumT::fixme(FirstReg + 1)); | 
 |     Hi->setMustHaveReg(); | 
 |   } | 
 | } | 
 | } // end of anonymous namespace | 
 |  | 
 | uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) { | 
 |   TargetARM32::CallingConv CC; | 
 |   RegNumT DummyReg; | 
 |   size_t OutArgsSizeBytes = 0; | 
 |   for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) { | 
 |     Operand *Arg = legalizeUndef(Call->getArg(i)); | 
 |     const Type Ty = Arg->getType(); | 
 |     if (isScalarIntegerType(Ty)) { | 
 |       if (CC.argInGPR(Ty, &DummyReg)) { | 
 |         continue; | 
 |       } | 
 |     } else { | 
 |       if (CC.argInVFP(Ty, &DummyReg)) { | 
 |         continue; | 
 |       } | 
 |     } | 
 |  | 
 |     OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty); | 
 |     OutArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 
 |   } | 
 |  | 
 |   return applyStackAlignment(OutArgsSizeBytes); | 
 | } | 
 |  | 
 | void TargetARM32::genTargetHelperCallFor(Inst *Instr) { | 
 |   constexpr bool NoTailCall = false; | 
 |   constexpr bool IsTargetHelperCall = true; | 
 |  | 
 |   switch (Instr->getKind()) { | 
 |   default: | 
 |     return; | 
 |   case Inst::Arithmetic: { | 
 |     Variable *Dest = Instr->getDest(); | 
 |     const Type DestTy = Dest->getType(); | 
 |     const InstArithmetic::OpKind Op = | 
 |         llvm::cast<InstArithmetic>(Instr)->getOp(); | 
 |     if (isVectorType(DestTy)) { | 
 |       switch (Op) { | 
 |       default: | 
 |         break; | 
 |       case InstArithmetic::Fdiv: | 
 |       case InstArithmetic::Frem: | 
 |       case InstArithmetic::Sdiv: | 
 |       case InstArithmetic::Srem: | 
 |       case InstArithmetic::Udiv: | 
 |       case InstArithmetic::Urem: | 
 |         scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1)); | 
 |         Instr->setDeleted(); | 
 |         return; | 
 |       } | 
 |     } | 
 |     switch (DestTy) { | 
 |     default: | 
 |       return; | 
 |     case IceType_i64: { | 
 |       // Technically, ARM has its own aeabi routines, but we can use the | 
 |       // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses | 
 |       // the more standard __moddi3 for rem. | 
 |       RuntimeHelper HelperID = RuntimeHelper::H_Num; | 
 |       switch (Op) { | 
 |       default: | 
 |         return; | 
 |       case InstArithmetic::Udiv: | 
 |         HelperID = RuntimeHelper::H_udiv_i64; | 
 |         break; | 
 |       case InstArithmetic::Sdiv: | 
 |         HelperID = RuntimeHelper::H_sdiv_i64; | 
 |         break; | 
 |       case InstArithmetic::Urem: | 
 |         HelperID = RuntimeHelper::H_urem_i64; | 
 |         break; | 
 |       case InstArithmetic::Srem: | 
 |         HelperID = RuntimeHelper::H_srem_i64; | 
 |         break; | 
 |       } | 
 |       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID); | 
 |       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem; | 
 |       constexpr SizeT MaxArgs = 2; | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(Instr->getSrc(0)); | 
 |       Call->addArg(Instr->getSrc(1)); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case IceType_i32: | 
 |     case IceType_i16: | 
 |     case IceType_i8: { | 
 |       const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm); | 
 |       InstCast::OpKind CastKind; | 
 |       RuntimeHelper HelperID = RuntimeHelper::H_Num; | 
 |       switch (Op) { | 
 |       default: | 
 |         return; | 
 |       case InstArithmetic::Udiv: | 
 |         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32; | 
 |         CastKind = InstCast::Zext; | 
 |         break; | 
 |       case InstArithmetic::Sdiv: | 
 |         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32; | 
 |         CastKind = InstCast::Sext; | 
 |         break; | 
 |       case InstArithmetic::Urem: | 
 |         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32; | 
 |         CastKind = InstCast::Zext; | 
 |         break; | 
 |       case InstArithmetic::Srem: | 
 |         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32; | 
 |         CastKind = InstCast::Sext; | 
 |         break; | 
 |       } | 
 |       if (HelperID == RuntimeHelper::H_Num) { | 
 |         // HelperID should only ever be undefined when the processor does not | 
 |         // have a hardware divider. If any other helpers are ever introduced, | 
 |         // the following assert will have to be modified. | 
 |         assert(HasHWDiv); | 
 |         return; | 
 |       } | 
 |       Operand *Src0 = Instr->getSrc(0); | 
 |       Operand *Src1 = Instr->getSrc(1); | 
 |       if (DestTy != IceType_i32) { | 
 |         // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0, | 
 |         // we just insert a InstCast right before the call to the helper. | 
 |         Variable *Src0_32 = Func->makeVariable(IceType_i32); | 
 |         Context.insert<InstCast>(CastKind, Src0_32, Src0); | 
 |         Src0 = Src0_32; | 
 |  | 
 |         // For extending Src1, we will just insert an InstCast if Src1 is not a | 
 |         // Constant. If it is, then we extend it here, and not during program | 
 |         // runtime. This allows preambleDivRem to optimize-out the div-by-0 | 
 |         // check. | 
 |         if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 
 |           const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24; | 
 |           int32_t NewC = C->getValue(); | 
 |           if (CastKind == InstCast::Zext) { | 
 |             NewC &= ~(0x80000000l >> ShAmt); | 
 |           } else { | 
 |             NewC = (NewC << ShAmt) >> ShAmt; | 
 |           } | 
 |           Src1 = Ctx->getConstantInt32(NewC); | 
 |         } else { | 
 |           Variable *Src1_32 = Func->makeVariable(IceType_i32); | 
 |           Context.insert<InstCast>(CastKind, Src1_32, Src1); | 
 |           Src1 = Src1_32; | 
 |         } | 
 |       } | 
 |       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID); | 
 |       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem; | 
 |       constexpr SizeT MaxArgs = 2; | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       assert(Src0->getType() == IceType_i32); | 
 |       Call->addArg(Src0); | 
 |       assert(Src1->getType() == IceType_i32); | 
 |       Call->addArg(Src1); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case IceType_f64: | 
 |     case IceType_f32: { | 
 |       if (Op != InstArithmetic::Frem) { | 
 |         return; | 
 |       } | 
 |       constexpr SizeT MaxArgs = 2; | 
 |       Operand *TargetHelper = Ctx->getRuntimeHelperFunc( | 
 |           DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32 | 
 |                                 : RuntimeHelper::H_frem_f64); | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(Instr->getSrc(0)); | 
 |       Call->addArg(Instr->getSrc(1)); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     } | 
 |     llvm::report_fatal_error("Control flow should never have reached here."); | 
 |   } | 
 |   case Inst::Cast: { | 
 |     Variable *Dest = Instr->getDest(); | 
 |     Operand *Src0 = Instr->getSrc(0); | 
 |     const Type DestTy = Dest->getType(); | 
 |     const Type SrcTy = Src0->getType(); | 
 |     auto *CastInstr = llvm::cast<InstCast>(Instr); | 
 |     const InstCast::OpKind CastKind = CastInstr->getCastKind(); | 
 |  | 
 |     switch (CastKind) { | 
 |     default: | 
 |       return; | 
 |     case InstCast::Fptosi: | 
 |     case InstCast::Fptoui: { | 
 |       if (DestTy != IceType_i64) { | 
 |         return; | 
 |       } | 
 |       const bool DestIsSigned = CastKind == InstCast::Fptosi; | 
 |       const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy); | 
 |       Operand *TargetHelper = Ctx->getRuntimeHelperFunc( | 
 |           Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64 | 
 |                                     : RuntimeHelper::H_fptoui_f32_i64) | 
 |                     : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64 | 
 |                                     : RuntimeHelper::H_fptoui_f64_i64)); | 
 |       static constexpr SizeT MaxArgs = 1; | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(Src0); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case InstCast::Sitofp: | 
 |     case InstCast::Uitofp: { | 
 |       if (SrcTy != IceType_i64) { | 
 |         return; | 
 |       } | 
 |       const bool SourceIsSigned = CastKind == InstCast::Sitofp; | 
 |       const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType()); | 
 |       Operand *TargetHelper = Ctx->getRuntimeHelperFunc( | 
 |           DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32 | 
 |                                       : RuntimeHelper::H_uitofp_i64_f32) | 
 |                     : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64 | 
 |                                       : RuntimeHelper::H_uitofp_i64_f64)); | 
 |       static constexpr SizeT MaxArgs = 1; | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(Src0); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case InstCast::Bitcast: { | 
 |       if (DestTy == SrcTy) { | 
 |         return; | 
 |       } | 
 |       Variable *CallDest = Dest; | 
 |       RuntimeHelper HelperID = RuntimeHelper::H_Num; | 
 |       switch (DestTy) { | 
 |       default: | 
 |         return; | 
 |       case IceType_i8: | 
 |         assert(SrcTy == IceType_v8i1); | 
 |         HelperID = RuntimeHelper::H_bitcast_8xi1_i8; | 
 |         CallDest = Func->makeVariable(IceType_i32); | 
 |         break; | 
 |       case IceType_i16: | 
 |         assert(SrcTy == IceType_v16i1); | 
 |         HelperID = RuntimeHelper::H_bitcast_16xi1_i16; | 
 |         CallDest = Func->makeVariable(IceType_i32); | 
 |         break; | 
 |       case IceType_v8i1: { | 
 |         assert(SrcTy == IceType_i8); | 
 |         HelperID = RuntimeHelper::H_bitcast_i8_8xi1; | 
 |         Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 
 |         // Arguments to functions are required to be at least 32 bits wide. | 
 |         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); | 
 |         Src0 = Src0AsI32; | 
 |       } break; | 
 |       case IceType_v16i1: { | 
 |         assert(SrcTy == IceType_i16); | 
 |         HelperID = RuntimeHelper::H_bitcast_i16_16xi1; | 
 |         Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 
 |         // Arguments to functions are required to be at least 32 bits wide. | 
 |         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); | 
 |         Src0 = Src0AsI32; | 
 |       } break; | 
 |       } | 
 |       constexpr SizeT MaxSrcs = 1; | 
 |       InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs); | 
 |       Call->addArg(Src0); | 
 |       Context.insert(Call); | 
 |       // The PNaCl ABI disallows i8/i16 return types, so truncate the helper | 
 |       // call result to the appropriate type as necessary. | 
 |       if (CallDest->getType() != Dest->getType()) | 
 |         Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case InstCast::Trunc: { | 
 |       if (DestTy == SrcTy) { | 
 |         return; | 
 |       } | 
 |       if (!isVectorType(SrcTy)) { | 
 |         return; | 
 |       } | 
 |       assert(typeNumElements(DestTy) == typeNumElements(SrcTy)); | 
 |       assert(typeElementType(DestTy) == IceType_i1); | 
 |       assert(isVectorIntegerType(SrcTy)); | 
 |       return; | 
 |     } | 
 |     case InstCast::Sext: | 
 |     case InstCast::Zext: { | 
 |       if (DestTy == SrcTy) { | 
 |         return; | 
 |       } | 
 |       if (!isVectorType(DestTy)) { | 
 |         return; | 
 |       } | 
 |       assert(typeNumElements(DestTy) == typeNumElements(SrcTy)); | 
 |       assert(typeElementType(SrcTy) == IceType_i1); | 
 |       assert(isVectorIntegerType(DestTy)); | 
 |       return; | 
 |     } | 
 |     } | 
 |     llvm::report_fatal_error("Control flow should never have reached here."); | 
 |   } | 
 |   case Inst::IntrinsicCall: { | 
 |     Variable *Dest = Instr->getDest(); | 
 |     auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr); | 
 |     Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID; | 
 |     switch (ID) { | 
 |     default: | 
 |       return; | 
 |     case Intrinsics::Ctpop: { | 
 |       Operand *Src0 = IntrinsicCall->getArg(0); | 
 |       Operand *TargetHelper = | 
 |           Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType()) | 
 |                                         ? RuntimeHelper::H_call_ctpop_i32 | 
 |                                         : RuntimeHelper::H_call_ctpop_i64); | 
 |       static constexpr SizeT MaxArgs = 1; | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(Src0); | 
 |       Instr->setDeleted(); | 
 |       if (Src0->getType() == IceType_i64) { | 
 |         ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64; | 
 |       } | 
 |       return; | 
 |     } | 
 |     case Intrinsics::Longjmp: { | 
 |       static constexpr SizeT MaxArgs = 2; | 
 |       static constexpr Variable *NoDest = nullptr; | 
 |       Operand *TargetHelper = | 
 |           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp); | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(IntrinsicCall->getArg(0)); | 
 |       Call->addArg(IntrinsicCall->getArg(1)); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case Intrinsics::Memcpy: { | 
 |       // In the future, we could potentially emit an inline memcpy/memset, etc. | 
 |       // for intrinsic calls w/ a known length. | 
 |       static constexpr SizeT MaxArgs = 3; | 
 |       static constexpr Variable *NoDest = nullptr; | 
 |       Operand *TargetHelper = | 
 |           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy); | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(IntrinsicCall->getArg(0)); | 
 |       Call->addArg(IntrinsicCall->getArg(1)); | 
 |       Call->addArg(IntrinsicCall->getArg(2)); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case Intrinsics::Memmove: { | 
 |       static constexpr SizeT MaxArgs = 3; | 
 |       static constexpr Variable *NoDest = nullptr; | 
 |       Operand *TargetHelper = | 
 |           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove); | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(IntrinsicCall->getArg(0)); | 
 |       Call->addArg(IntrinsicCall->getArg(1)); | 
 |       Call->addArg(IntrinsicCall->getArg(2)); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case Intrinsics::Memset: { | 
 |       // The value operand needs to be extended to a stack slot size because the | 
 |       // PNaCl ABI requires arguments to be at least 32 bits wide. | 
 |       Operand *ValOp = IntrinsicCall->getArg(1); | 
 |       assert(ValOp->getType() == IceType_i8); | 
 |       Variable *ValExt = Func->makeVariable(stackSlotType()); | 
 |       Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp); | 
 |  | 
 |       // Technically, ARM has its own __aeabi_memset, but we can use plain | 
 |       // memset too. The value and size argument need to be flipped if we ever | 
 |       // decide to use __aeabi_memset. | 
 |       static constexpr SizeT MaxArgs = 3; | 
 |       static constexpr Variable *NoDest = nullptr; | 
 |       Operand *TargetHelper = | 
 |           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset); | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(IntrinsicCall->getArg(0)); | 
 |       Call->addArg(ValExt); | 
 |       Call->addArg(IntrinsicCall->getArg(2)); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case Intrinsics::NaClReadTP: { | 
 |       if (SandboxingType == ST_NaCl) { | 
 |         return; | 
 |       } | 
 |       static constexpr SizeT MaxArgs = 0; | 
 |       Operand *TargetHelper = | 
 |           SandboxingType == ST_Nonsfi | 
 |               ? Ctx->getConstantExternSym( | 
 |                     Ctx->getGlobalString("__aeabi_read_tp")) | 
 |               : Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp); | 
 |       Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, | 
 |                                IsTargetHelperCall); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     case Intrinsics::Setjmp: { | 
 |       static constexpr SizeT MaxArgs = 1; | 
 |       Operand *TargetHelper = | 
 |           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp); | 
 |       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, | 
 |                                             NoTailCall, IsTargetHelperCall); | 
 |       Call->addArg(IntrinsicCall->getArg(0)); | 
 |       Instr->setDeleted(); | 
 |       return; | 
 |     } | 
 |     } | 
 |     llvm::report_fatal_error("Control flow should never have reached here."); | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::findMaxStackOutArgsSize() { | 
 |   // MinNeededOutArgsBytes should be updated if the Target ever creates a | 
 |   // high-level InstCall that requires more stack bytes. | 
 |   constexpr size_t MinNeededOutArgsBytes = 0; | 
 |   MaxOutArgsSizeBytes = MinNeededOutArgsBytes; | 
 |   for (CfgNode *Node : Func->getNodes()) { | 
 |     Context.init(Node); | 
 |     while (!Context.atEnd()) { | 
 |       PostIncrLoweringContext PostIncrement(Context); | 
 |       Inst *CurInstr = iteratorToInst(Context.getCur()); | 
 |       if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { | 
 |         SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); | 
 |         MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); | 
 |       } | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::createGotPtr() { | 
 |   if (SandboxingType != ST_Nonsfi) { | 
 |     return; | 
 |   } | 
 |   GotPtr = Func->makeVariable(IceType_i32); | 
 | } | 
 |  | 
 | void TargetARM32::insertGotPtrInitPlaceholder() { | 
 |   if (SandboxingType != ST_Nonsfi) { | 
 |     return; | 
 |   } | 
 |   assert(GotPtr != nullptr); | 
 |   // We add the two placeholder instructions here. The first fakedefs T, an | 
 |   // infinite-weight temporary, while the second fakedefs the GotPtr "using" T. | 
 |   // This is needed because the GotPtr initialization, if needed, will require | 
 |   // a register: | 
 |   // | 
 |   //   movw     reg, _GLOBAL_OFFSET_TABLE_ - 16 - . | 
 |   //   movt     reg, _GLOBAL_OFFSET_TABLE_ - 12 - . | 
 |   //   add      reg, pc, reg | 
 |   //   mov      GotPtr, reg | 
 |   // | 
 |   // If GotPtr is not used, then both these pseudo-instructions are dce'd. | 
 |   Variable *T = makeReg(IceType_i32); | 
 |   Context.insert<InstFakeDef>(T); | 
 |   Context.insert<InstFakeDef>(GotPtr, T); | 
 | } | 
 |  | 
 | GlobalString | 
 | TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) { | 
 |   GlobalString CRName = CR->getName(); | 
 |   GlobalString CRGotoffName = | 
 |       Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName); | 
 |   if (KnownGotoffs.count(CRGotoffName) == 0) { | 
 |     constexpr bool SuppressMangling = true; | 
 |     auto *Global = | 
 |         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling); | 
 |     Global->setIsConstant(true); | 
 |     Global->setName(CRName); | 
 |     Func->getGlobalPool()->willNotBeEmitted(Global); | 
 |  | 
 |     auto *Gotoff = | 
 |         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling); | 
 |     constexpr auto GotFixup = R_ARM_GOTOFF32; | 
 |     Gotoff->setIsConstant(true); | 
 |     Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create( | 
 |         Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)}, | 
 |         GotFixup)); | 
 |     Gotoff->setName(CRGotoffName); | 
 |     Func->addGlobal(Gotoff); | 
 |     KnownGotoffs.emplace(CRGotoffName); | 
 |   } | 
 |   return CRGotoffName; | 
 | } | 
 |  | 
 | void TargetARM32::materializeGotAddr(CfgNode *Node) { | 
 |   if (SandboxingType != ST_Nonsfi) { | 
 |     return; | 
 |   } | 
 |  | 
 |   // At first, we try to find the | 
 |   //    GotPtr = def T | 
 |   // pseudo-instruction that we placed for defining the got ptr. That | 
 |   // instruction is not just a place-holder for defining the GotPtr (thus | 
 |   // keeping liveness consistent), but it is also located at a point where it is | 
 |   // safe to materialize the got addr -- i.e., before loading parameters to | 
 |   // registers, but after moving register parameters from their home location. | 
 |   InstFakeDef *DefGotPtr = nullptr; | 
 |   for (auto &Inst : Node->getInsts()) { | 
 |     auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst); | 
 |     if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) { | 
 |       DefGotPtr = FakeDef; | 
 |       break; | 
 |     } | 
 |   } | 
 |  | 
 |   if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) { | 
 |     return; | 
 |   } | 
 |  | 
 |   // The got addr needs to be materialized at the same point where DefGotPtr | 
 |   // lives. | 
 |   Context.setInsertPoint(instToIterator(DefGotPtr)); | 
 |   assert(DefGotPtr->getSrcSize() == 1); | 
 |   auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0)); | 
 |   loadNamedConstantRelocatablePIC(Ctx->getGlobalString(GlobalOffsetTable), T, | 
 |                                   [this, T](Variable *PC) { _add(T, PC, T); }); | 
 |   _mov(GotPtr, T); | 
 |   DefGotPtr->setDeleted(); | 
 | } | 
 |  | 
 | void TargetARM32::loadNamedConstantRelocatablePIC( | 
 |     GlobalString Name, Variable *Register, | 
 |     std::function<void(Variable *PC)> Finish) { | 
 |   assert(SandboxingType == ST_Nonsfi); | 
 |   // We makeReg() here instead of getPhysicalRegister() because the latter ends | 
 |   // up creating multi-blocks temporaries that liveness fails to validate. | 
 |   auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc); | 
 |  | 
 |   auto *AddPcReloc = RelocOffset::create(Ctx); | 
 |   AddPcReloc->setSubtract(true); | 
 |   auto *AddPcLabel = InstARM32Label::create(Func, this); | 
 |   AddPcLabel->setRelocOffset(AddPcReloc); | 
 |  | 
 |   auto *MovwReloc = RelocOffset::create(Ctx); | 
 |   auto *MovwLabel = InstARM32Label::create(Func, this); | 
 |   MovwLabel->setRelocOffset(MovwReloc); | 
 |  | 
 |   auto *MovtReloc = RelocOffset::create(Ctx); | 
 |   auto *MovtLabel = InstARM32Label::create(Func, this); | 
 |   MovtLabel->setRelocOffset(MovtReloc); | 
 |  | 
 |   // The EmitString for these constant relocatables have hardcoded offsets | 
 |   // attached to them. This could be dangerous if, e.g., we ever implemented | 
 |   // instruction scheduling but llvm-mc currently does not support | 
 |   // | 
 |   //   movw reg, #:lower16:(Symbol - Label - Number) | 
 |   //   movt reg, #:upper16:(Symbol - Label - Number) | 
 |   // | 
 |   // relocations. | 
 |   static constexpr RelocOffsetT PcOffset = -8; | 
 |   auto *CRLower = Ctx->getConstantSymWithEmitString( | 
 |       PcOffset, {MovwReloc, AddPcReloc}, Name, Name + " -16"); | 
 |   auto *CRUpper = Ctx->getConstantSymWithEmitString( | 
 |       PcOffset, {MovtReloc, AddPcReloc}, Name, Name + " -12"); | 
 |  | 
 |   Context.insert(MovwLabel); | 
 |   _movw(Register, CRLower); | 
 |   Context.insert(MovtLabel); | 
 |   _movt(Register, CRUpper); | 
 |   // PC = fake-def to keep liveness consistent. | 
 |   Context.insert<InstFakeDef>(PC); | 
 |   Context.insert(AddPcLabel); | 
 |   Finish(PC); | 
 | } | 
 |  | 
 | void TargetARM32::translateO2() { | 
 |   TimerMarker T(TimerStack::TT_O2, Func); | 
 |  | 
 |   // TODO(stichnot): share passes with other targets? | 
 |   // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 
 |   if (SandboxingType == ST_Nonsfi) { | 
 |     createGotPtr(); | 
 |   } | 
 |   genTargetHelperCalls(); | 
 |   findMaxStackOutArgsSize(); | 
 |  | 
 |   // Do not merge Alloca instructions, and lay out the stack. | 
 |   static constexpr bool SortAndCombineAllocas = true; | 
 |   Func->processAllocas(SortAndCombineAllocas); | 
 |   Func->dump("After Alloca processing"); | 
 |  | 
 |   if (!getFlags().getEnablePhiEdgeSplit()) { | 
 |     // Lower Phi instructions. | 
 |     Func->placePhiLoads(); | 
 |     if (Func->hasError()) | 
 |       return; | 
 |     Func->placePhiStores(); | 
 |     if (Func->hasError()) | 
 |       return; | 
 |     Func->deletePhis(); | 
 |     if (Func->hasError()) | 
 |       return; | 
 |     Func->dump("After Phi lowering"); | 
 |   } | 
 |  | 
 |   // Address mode optimization. | 
 |   Func->getVMetadata()->init(VMK_SingleDefs); | 
 |   Func->doAddressOpt(); | 
 |   Func->materializeVectorShuffles(); | 
 |  | 
 |   // Argument lowering | 
 |   Func->doArgLowering(); | 
 |  | 
 |   // Target lowering. This requires liveness analysis for some parts of the | 
 |   // lowering decisions, such as compare/branch fusing. If non-lightweight | 
 |   // liveness analysis is used, the instructions need to be renumbered first. | 
 |   // TODO: This renumbering should only be necessary if we're actually | 
 |   // calculating live intervals, which we only do for register allocation. | 
 |   Func->renumberInstructions(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |  | 
 |   // TODO: It should be sufficient to use the fastest liveness calculation, | 
 |   // i.e. livenessLightweight(). However, for some reason that slows down the | 
 |   // rest of the translation. Investigate. | 
 |   Func->liveness(Liveness_Basic); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->dump("After ARM32 address mode opt"); | 
 |  | 
 |   if (SandboxingType == ST_Nonsfi) { | 
 |     insertGotPtrInitPlaceholder(); | 
 |   } | 
 |   Func->genCode(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->dump("After ARM32 codegen"); | 
 |  | 
 |   // Register allocation. This requires instruction renumbering and full | 
 |   // liveness analysis. | 
 |   Func->renumberInstructions(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->liveness(Liveness_Intervals); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   // The post-codegen dump is done here, after liveness analysis and associated | 
 |   // cleanup, to make the dump cleaner and more useful. | 
 |   Func->dump("After initial ARM32 codegen"); | 
 |   // Validate the live range computations. The expensive validation call is | 
 |   // deliberately only made when assertions are enabled. | 
 |   assert(Func->validateLiveness()); | 
 |   Func->getVMetadata()->init(VMK_All); | 
 |   regAlloc(RAK_Global); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |  | 
 |   copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); | 
 |   Func->dump("After linear scan regalloc"); | 
 |  | 
 |   if (getFlags().getEnablePhiEdgeSplit()) { | 
 |     Func->advancedPhiLowering(); | 
 |     Func->dump("After advanced Phi lowering"); | 
 |   } | 
 |  | 
 |   ForbidTemporaryWithoutReg _(this); | 
 |  | 
 |   // Stack frame mapping. | 
 |   Func->genFrame(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->dump("After stack frame mapping"); | 
 |  | 
 |   postLowerLegalization(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->dump("After postLowerLegalization"); | 
 |  | 
 |   Func->contractEmptyNodes(); | 
 |   Func->reorderNodes(); | 
 |  | 
 |   // Branch optimization. This needs to be done just before code emission. In | 
 |   // particular, no transformations that insert or reorder CfgNodes should be | 
 |   // done after branch optimization. We go ahead and do it before nop insertion | 
 |   // to reduce the amount of work needed for searching for opportunities. | 
 |   Func->doBranchOpt(); | 
 |   Func->dump("After branch optimization"); | 
 |  | 
 |   // Nop insertion | 
 |   if (getFlags().getShouldDoNopInsertion()) { | 
 |     Func->doNopInsertion(); | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::translateOm1() { | 
 |   TimerMarker T(TimerStack::TT_Om1, Func); | 
 |  | 
 |   // TODO(stichnot): share passes with other targets? | 
 |   if (SandboxingType == ST_Nonsfi) { | 
 |     createGotPtr(); | 
 |   } | 
 |  | 
 |   genTargetHelperCalls(); | 
 |   findMaxStackOutArgsSize(); | 
 |  | 
 |   // Do not merge Alloca instructions, and lay out the stack. | 
 |   static constexpr bool DontSortAndCombineAllocas = false; | 
 |   Func->processAllocas(DontSortAndCombineAllocas); | 
 |   Func->dump("After Alloca processing"); | 
 |  | 
 |   Func->placePhiLoads(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->placePhiStores(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->deletePhis(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->dump("After Phi lowering"); | 
 |  | 
 |   Func->doArgLowering(); | 
 |  | 
 |   if (SandboxingType == ST_Nonsfi) { | 
 |     insertGotPtrInitPlaceholder(); | 
 |   } | 
 |   Func->genCode(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->dump("After initial ARM32 codegen"); | 
 |  | 
 |   regAlloc(RAK_InfOnly); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |  | 
 |   copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); | 
 |   Func->dump("After regalloc of infinite-weight variables"); | 
 |  | 
 |   ForbidTemporaryWithoutReg _(this); | 
 |  | 
 |   Func->genFrame(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->dump("After stack frame mapping"); | 
 |  | 
 |   postLowerLegalization(); | 
 |   if (Func->hasError()) | 
 |     return; | 
 |   Func->dump("After postLowerLegalization"); | 
 |  | 
 |   // Nop insertion | 
 |   if (getFlags().getShouldDoNopInsertion()) { | 
 |     Func->doNopInsertion(); | 
 |   } | 
 | } | 
 |  | 
 | uint32_t TargetARM32::getStackAlignment() const { | 
 |   return ARM32_STACK_ALIGNMENT_BYTES; | 
 | } | 
 |  | 
 | bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 
 |   if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) { | 
 |     return Br->optimizeBranch(NextNode); | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const { | 
 |   (void)Ty; | 
 |   return RegARM32::getRegName(RegNum); | 
 | } | 
 |  | 
 | Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) { | 
 |   static const Type DefaultType[] = { | 
 | #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \ | 
 |           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \ | 
 |   (isFP32)                                                                     \ | 
 |       ? IceType_f32                                                            \ | 
 |       : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))), | 
 |       REGARM32_TABLE | 
 | #undef X | 
 |   }; | 
 |  | 
 |   if (Ty == IceType_void) { | 
 |     assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType)); | 
 |     Ty = DefaultType[RegNum]; | 
 |   } | 
 |   if (PhysicalRegisters[Ty].empty()) | 
 |     PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); | 
 |   assert(unsigned(RegNum) < PhysicalRegisters[Ty].size()); | 
 |   Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 
 |   if (Reg == nullptr) { | 
 |     Reg = Func->makeVariable(Ty); | 
 |     Reg->setRegNum(RegNum); | 
 |     PhysicalRegisters[Ty][RegNum] = Reg; | 
 |     // Specially mark a named physical register as an "argument" so that it is | 
 |     // considered live upon function entry.  Otherwise it's possible to get | 
 |     // liveness validation errors for saving callee-save registers. | 
 |     Func->addImplicitArg(Reg); | 
 |     // Don't bother tracking the live range of a named physical register. | 
 |     Reg->setIgnoreLiveness(); | 
 |   } | 
 |   return Reg; | 
 | } | 
 |  | 
 | void TargetARM32::emitJumpTable(const Cfg *Func, | 
 |                                 const InstJumpTable *JumpTable) const { | 
 |   (void)Func; | 
 |   (void)JumpTable; | 
 |   UnimplementedError(getFlags()); | 
 | } | 
 |  | 
 | void TargetARM32::emitVariable(const Variable *Var) const { | 
 |   if (!BuildDefs::dump()) | 
 |     return; | 
 |   Ostream &Str = Ctx->getStrEmit(); | 
 |   if (Var->hasReg()) { | 
 |     Str << getRegName(Var->getRegNum(), Var->getType()); | 
 |     return; | 
 |   } | 
 |   if (Var->mustHaveReg()) { | 
 |     llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() + | 
 |                              ") has no register assigned - function " + | 
 |                              Func->getFunctionName()); | 
 |   } | 
 |   assert(!Var->isRematerializable()); | 
 |   int32_t Offset = Var->getStackOffset(); | 
 |   auto BaseRegNum = Var->getBaseRegNum(); | 
 |   if (BaseRegNum.hasNoValue()) { | 
 |     BaseRegNum = getFrameOrStackReg(); | 
 |   } | 
 |   const Type VarTy = Var->getType(); | 
 |   Str << "[" << getRegName(BaseRegNum, VarTy); | 
 |   if (Offset != 0) { | 
 |     Str << ", #" << Offset; | 
 |   } | 
 |   Str << "]"; | 
 | } | 
 |  | 
 | TargetARM32::CallingConv::CallingConv() | 
 |     : GPRegsUsed(RegARM32::Reg_NUM), | 
 |       GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()), | 
 |       I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()), | 
 |       VFPRegsUsed(RegARM32::Reg_NUM), | 
 |       FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()), | 
 |       FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()), | 
 |       Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {} | 
 |  | 
 | bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { | 
 |   CfgVector<RegNumT> *Source; | 
 |  | 
 |   switch (Ty) { | 
 |   default: { | 
 |     assert(isScalarIntegerType(Ty)); | 
 |     Source = &GPRArgs; | 
 |   } break; | 
 |   case IceType_i64: { | 
 |     Source = &I64Args; | 
 |   } break; | 
 |   } | 
 |  | 
 |   discardUnavailableGPRsAndTheirAliases(Source); | 
 |  | 
 |   if (Source->empty()) { | 
 |     GPRegsUsed.set(); | 
 |     return false; | 
 |   } | 
 |  | 
 |   *Reg = Source->back(); | 
 |   // Note that we don't Source->pop_back() here. This is intentional. Notice how | 
 |   // we mark all of Reg's aliases as Used. So, for the next argument, | 
 |   // Source->back() is marked as unavailable, and it is thus implicitly popped | 
 |   // from the stack. | 
 |   GPRegsUsed |= RegisterAliases[*Reg]; | 
 |   return true; | 
 | } | 
 |  | 
 | // GPR are not packed when passing parameters. Thus, a function foo(i32, i64, | 
 | // i32) will have the first argument in r0, the second in r1-r2, and the third | 
 | // on the stack. To model this behavior, whenever we pop a register from Regs, | 
 | // we remove all of its aliases from the pool of available GPRs. This has the | 
 | // effect of computing the "closure" on the GPR registers. | 
 | void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases( | 
 |     CfgVector<RegNumT> *Regs) { | 
 |   while (!Regs->empty() && GPRegsUsed[Regs->back()]) { | 
 |     GPRegsUsed |= RegisterAliases[Regs->back()]; | 
 |     Regs->pop_back(); | 
 |   } | 
 | } | 
 |  | 
 | bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) { | 
 |   CfgVector<RegNumT> *Source; | 
 |  | 
 |   switch (Ty) { | 
 |   default: { | 
 |     assert(isVectorType(Ty)); | 
 |     Source = &Vec128Args; | 
 |   } break; | 
 |   case IceType_f32: { | 
 |     Source = &FP32Args; | 
 |   } break; | 
 |   case IceType_f64: { | 
 |     Source = &FP64Args; | 
 |   } break; | 
 |   } | 
 |  | 
 |   discardUnavailableVFPRegs(Source); | 
 |  | 
 |   if (Source->empty()) { | 
 |     VFPRegsUsed.set(); | 
 |     return false; | 
 |   } | 
 |  | 
 |   *Reg = Source->back(); | 
 |   VFPRegsUsed |= RegisterAliases[*Reg]; | 
 |   return true; | 
 | } | 
 |  | 
 | // Arguments in VFP registers are not packed, so we don't mark the popped | 
 | // registers' aliases as unavailable. | 
 | void TargetARM32::CallingConv::discardUnavailableVFPRegs( | 
 |     CfgVector<RegNumT> *Regs) { | 
 |   while (!Regs->empty() && VFPRegsUsed[Regs->back()]) { | 
 |     Regs->pop_back(); | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerArguments() { | 
 |   VarList &Args = Func->getArgs(); | 
 |   TargetARM32::CallingConv CC; | 
 |  | 
 |   // For each register argument, replace Arg in the argument list with the home | 
 |   // register. Then generate an instruction in the prolog to copy the home | 
 |   // register to the assigned location of Arg. | 
 |   Context.init(Func->getEntryNode()); | 
 |   Context.setInsertPoint(Context.getCur()); | 
 |  | 
 |   for (SizeT I = 0, E = Args.size(); I < E; ++I) { | 
 |     Variable *Arg = Args[I]; | 
 |     Type Ty = Arg->getType(); | 
 |     RegNumT RegNum; | 
 |     if (isScalarIntegerType(Ty)) { | 
 |       if (!CC.argInGPR(Ty, &RegNum)) { | 
 |         continue; | 
 |       } | 
 |     } else { | 
 |       if (!CC.argInVFP(Ty, &RegNum)) { | 
 |         continue; | 
 |       } | 
 |     } | 
 |  | 
 |     Variable *RegisterArg = Func->makeVariable(Ty); | 
 |     if (BuildDefs::dump()) { | 
 |       RegisterArg->setName(Func, "home_reg:" + Arg->getName()); | 
 |     } | 
 |     RegisterArg->setIsArg(); | 
 |     Arg->setIsArg(false); | 
 |     Args[I] = RegisterArg; | 
 |     switch (Ty) { | 
 |     default: { RegisterArg->setRegNum(RegNum); } break; | 
 |     case IceType_i64: { | 
 |       auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg); | 
 |       RegisterArg64->initHiLo(Func); | 
 |       RegisterArg64->getLo()->setRegNum( | 
 |           RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum))); | 
 |       RegisterArg64->getHi()->setRegNum( | 
 |           RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum))); | 
 |     } break; | 
 |     } | 
 |     Context.insert<InstAssign>(Arg, RegisterArg); | 
 |   } | 
 | } | 
 |  | 
 | // Helper function for addProlog(). | 
 | // | 
 | // This assumes Arg is an argument passed on the stack. This sets the frame | 
 | // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an | 
 | // I64 arg that has been split into Lo and Hi components, it calls itself | 
 | // recursively on the components, taking care to handle Lo first because of the | 
 | // little-endian architecture. Lastly, this function generates an instruction | 
 | // to copy Arg into its assigned register if applicable. | 
 | void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, | 
 |                                          size_t BasicFrameOffset, | 
 |                                          size_t *InArgsSizeBytes) { | 
 |   const Type Ty = Arg->getType(); | 
 |   *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty); | 
 |  | 
 |   if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { | 
 |     Variable *const Lo = Arg64On32->getLo(); | 
 |     Variable *const Hi = Arg64On32->getHi(); | 
 |     finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 
 |     finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 
 |     return; | 
 |   } | 
 |   assert(Ty != IceType_i64); | 
 |  | 
 |   const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes; | 
 |   *InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 
 |  | 
 |   if (!Arg->hasReg()) { | 
 |     Arg->setStackOffset(ArgStackOffset); | 
 |     return; | 
 |   } | 
 |  | 
 |   // If the argument variable has been assigned a register, we need to copy the | 
 |   // value from the stack slot. | 
 |   Variable *Parameter = Func->makeVariable(Ty); | 
 |   Parameter->setMustNotHaveReg(); | 
 |   Parameter->setStackOffset(ArgStackOffset); | 
 |   _mov(Arg, Parameter); | 
 | } | 
 |  | 
 | Type TargetARM32::stackSlotType() { return IceType_i32; } | 
 |  | 
 | void TargetARM32::addProlog(CfgNode *Node) { | 
 |   // Stack frame layout: | 
 |   // | 
 |   // +------------------------+ | 
 |   // | 1. preserved registers | | 
 |   // +------------------------+ | 
 |   // | 2. padding             | | 
 |   // +------------------------+ <--- FramePointer (if used) | 
 |   // | 3. global spill area   | | 
 |   // +------------------------+ | 
 |   // | 4. padding             | | 
 |   // +------------------------+ | 
 |   // | 5. local spill area    | | 
 |   // +------------------------+ | 
 |   // | 6. padding             | | 
 |   // +------------------------+ | 
 |   // | 7. allocas (variable)  | | 
 |   // +------------------------+ | 
 |   // | 8. padding             | | 
 |   // +------------------------+ | 
 |   // | 9. out args            | | 
 |   // +------------------------+ <--- StackPointer | 
 |   // | 
 |   // The following variables record the size in bytes of the given areas: | 
 |   //  * PreservedRegsSizeBytes: area 1 | 
 |   //  * SpillAreaPaddingBytes:  area 2 | 
 |   //  * GlobalsSize:            area 3 | 
 |   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4 | 
 |   //  * LocalsSpillAreaSize:    area 5 | 
 |   //  * SpillAreaSizeBytes:     areas 2 - 6, and 9 | 
 |   //  * MaxOutArgsSizeBytes:    area 9 | 
 |   // | 
 |   // Determine stack frame offsets for each Variable without a register | 
 |   // assignment.  This can be done as one variable per stack slot.  Or, do | 
 |   // coalescing by running the register allocator again with an infinite set of | 
 |   // registers (as a side effect, this gives variables a second chance at | 
 |   // physical register assignment). | 
 |   // | 
 |   // A middle ground approach is to leverage sparsity and allocate one block of | 
 |   // space on the frame for globals (variables with multi-block lifetime), and | 
 |   // one block to share for locals (single-block lifetime). | 
 |  | 
 |   Context.init(Node); | 
 |   Context.setInsertPoint(Context.getCur()); | 
 |  | 
 |   SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None); | 
 |   RegsUsed = SmallBitVector(CalleeSaves.size()); | 
 |   VarList SortedSpilledVariables; | 
 |   size_t GlobalsSize = 0; | 
 |   // If there is a separate locals area, this represents that area. Otherwise | 
 |   // it counts any variable not counted by GlobalsSize. | 
 |   SpillAreaSizeBytes = 0; | 
 |   // If there is a separate locals area, this specifies the alignment for it. | 
 |   uint32_t LocalsSlotsAlignmentBytes = 0; | 
 |   // The entire spill locations area gets aligned to largest natural alignment | 
 |   // of the variables that have a spill slot. | 
 |   uint32_t SpillAreaAlignmentBytes = 0; | 
 |   // For now, we don't have target-specific variables that need special | 
 |   // treatment (no stack-slot-linked SpillVariable type). | 
 |   std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) { | 
 |     static constexpr bool AssignStackSlot = false; | 
 |     static constexpr bool DontAssignStackSlot = !AssignStackSlot; | 
 |     if (llvm::isa<Variable64On32>(Var)) { | 
 |       return DontAssignStackSlot; | 
 |     } | 
 |     return AssignStackSlot; | 
 |   }; | 
 |  | 
 |   // Compute the list of spilled variables and bounds for GlobalsSize, etc. | 
 |   getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, | 
 |                         &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, | 
 |                         &LocalsSlotsAlignmentBytes, TargetVarHook); | 
 |   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | 
 |   SpillAreaSizeBytes += GlobalsSize; | 
 |  | 
 |   // Add push instructions for preserved registers. On ARM, "push" can push a | 
 |   // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has | 
 |   // callee-saved float/vector registers. | 
 |   // | 
 |   // The "vpush" instruction can handle a whole list of float/vector registers, | 
 |   // but it only handles contiguous sequences of registers by specifying the | 
 |   // start and the length. | 
 |   PreservedGPRs.reserve(CalleeSaves.size()); | 
 |   PreservedSRegs.reserve(CalleeSaves.size()); | 
 |  | 
 |   // Consider FP and LR as callee-save / used as needed. | 
 |   if (UsesFramePointer) { | 
 |     if (RegsUsed[RegARM32::Reg_fp]) { | 
 |       llvm::report_fatal_error("Frame pointer has been used."); | 
 |     } | 
 |     CalleeSaves[RegARM32::Reg_fp] = true; | 
 |     RegsUsed[RegARM32::Reg_fp] = true; | 
 |   } | 
 |   if (!MaybeLeafFunc) { | 
 |     CalleeSaves[RegARM32::Reg_lr] = true; | 
 |     RegsUsed[RegARM32::Reg_lr] = true; | 
 |   } | 
 |  | 
 |   // Make two passes over the used registers. The first pass records all the | 
 |   // used registers -- and their aliases. Then, we figure out which GPRs and | 
 |   // VFP S registers should be saved. We don't bother saving D/Q registers | 
 |   // because their uses are recorded as S regs uses. | 
 |   SmallBitVector ToPreserve(RegARM32::Reg_NUM); | 
 |   for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 
 |     if (NeedSandboxing && i == RegARM32::Reg_r9) { | 
 |       // r9 is never updated in sandboxed code. | 
 |       continue; | 
 |     } | 
 |     if (CalleeSaves[i] && RegsUsed[i]) { | 
 |       ToPreserve |= RegisterAliases[i]; | 
 |     } | 
 |   } | 
 |  | 
 |   uint32_t NumCallee = 0; | 
 |   size_t PreservedRegsSizeBytes = 0; | 
 |  | 
 |   // RegClasses is a tuple of | 
 |   // | 
 |   // <First Register in Class, Last Register in Class, Vector of Save Registers> | 
 |   // | 
 |   // We use this tuple to figure out which register we should push/pop during | 
 |   // prolog/epilog. | 
 |   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>; | 
 |   const RegClassType RegClasses[] = { | 
 |       RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last, | 
 |                    &PreservedGPRs), | 
 |       RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last, | 
 |                    &PreservedSRegs)}; | 
 |   for (const auto &RegClass : RegClasses) { | 
 |     const uint32_t FirstRegInClass = std::get<0>(RegClass); | 
 |     const uint32_t LastRegInClass = std::get<1>(RegClass); | 
 |     VarList *const PreservedRegsInClass = std::get<2>(RegClass); | 
 |     for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) { | 
 |       if (!ToPreserve[Reg]) { | 
 |         continue; | 
 |       } | 
 |       ++NumCallee; | 
 |       Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg)); | 
 |       PreservedRegsSizeBytes += | 
 |           typeWidthInBytesOnStack(PhysicalRegister->getType()); | 
 |       PreservedRegsInClass->push_back(PhysicalRegister); | 
 |     } | 
 |   } | 
 |  | 
 |   Ctx->statsUpdateRegistersSaved(NumCallee); | 
 |   if (!PreservedSRegs.empty()) | 
 |     _push(PreservedSRegs); | 
 |   if (!PreservedGPRs.empty()) | 
 |     _push(PreservedGPRs); | 
 |  | 
 |   // Generate "mov FP, SP" if needed. | 
 |   if (UsesFramePointer) { | 
 |     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 
 |     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 
 |     _mov(FP, SP); | 
 |     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). | 
 |     Context.insert<InstFakeUse>(FP); | 
 |   } | 
 |  | 
 |   // Align the variables area. SpillAreaPaddingBytes is the size of the region | 
 |   // after the preserved registers and before the spill areas. | 
 |   // LocalsSlotsPaddingBytes is the amount of padding between the globals and | 
 |   // locals area if they are separate. | 
 |   assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); | 
 |   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 
 |   uint32_t SpillAreaPaddingBytes = 0; | 
 |   uint32_t LocalsSlotsPaddingBytes = 0; | 
 |   alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, | 
 |                        GlobalsSize, LocalsSlotsAlignmentBytes, | 
 |                        &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); | 
 |   SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 
 |   uint32_t GlobalsAndSubsequentPaddingSize = | 
 |       GlobalsSize + LocalsSlotsPaddingBytes; | 
 |  | 
 |   // Adds the out args space to the stack, and align SP if necessary. | 
 |   if (!NeedsStackAlignment) { | 
 |     SpillAreaSizeBytes += MaxOutArgsSizeBytes; | 
 |   } else { | 
 |     uint32_t StackOffset = PreservedRegsSizeBytes; | 
 |     uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 
 |     StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); | 
 |     SpillAreaSizeBytes = StackSize - StackOffset; | 
 |   } | 
 |  | 
 |   // Combine fixed alloca with SpillAreaSize. | 
 |   SpillAreaSizeBytes += FixedAllocaSizeBytes; | 
 |  | 
 |   // Generate "sub sp, SpillAreaSizeBytes" | 
 |   if (SpillAreaSizeBytes) { | 
 |     // Use the scratch register if needed to legalize the immediate. | 
 |     Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 
 |                                   Legal_Reg | Legal_Flex, getReservedTmpReg()); | 
 |     Sandboxer(this).sub_sp(SubAmount); | 
 |     if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) { | 
 |       Sandboxer(this).align_sp(FixedAllocaAlignBytes); | 
 |     } | 
 |   } | 
 |  | 
 |   Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 
 |  | 
 |   // Fill in stack offsets for stack args, and copy args into registers for | 
 |   // those that were register-allocated. Args are pushed right to left, so | 
 |   // Arg[0] is closest to the stack/frame pointer. | 
 |   Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 
 |   size_t BasicFrameOffset = PreservedRegsSizeBytes; | 
 |   if (!UsesFramePointer) | 
 |     BasicFrameOffset += SpillAreaSizeBytes; | 
 |  | 
 |   materializeGotAddr(Node); | 
 |  | 
 |   const VarList &Args = Func->getArgs(); | 
 |   size_t InArgsSizeBytes = 0; | 
 |   TargetARM32::CallingConv CC; | 
 |   for (Variable *Arg : Args) { | 
 |     RegNumT DummyReg; | 
 |     const Type Ty = Arg->getType(); | 
 |  | 
 |     // Skip arguments passed in registers. | 
 |     if (isScalarIntegerType(Ty)) { | 
 |       if (CC.argInGPR(Ty, &DummyReg)) { | 
 |         continue; | 
 |       } | 
 |     } else { | 
 |       if (CC.argInVFP(Ty, &DummyReg)) { | 
 |         continue; | 
 |       } | 
 |     } | 
 |     finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes); | 
 |   } | 
 |  | 
 |   // Fill in stack offsets for locals. | 
 |   assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, | 
 |                       SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, | 
 |                       UsesFramePointer); | 
 |   this->HasComputedFrame = true; | 
 |  | 
 |   if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { | 
 |     OstreamLocker _(Func->getContext()); | 
 |     Ostream &Str = Func->getContext()->getStrDump(); | 
 |  | 
 |     Str << "Stack layout:\n"; | 
 |     uint32_t SPAdjustmentPaddingSize = | 
 |         SpillAreaSizeBytes - LocalsSpillAreaSize - | 
 |         GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - | 
 |         MaxOutArgsSizeBytes; | 
 |     Str << " in-args = " << InArgsSizeBytes << " bytes\n" | 
 |         << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" | 
 |         << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" | 
 |         << " globals spill area = " << GlobalsSize << " bytes\n" | 
 |         << " globals-locals spill areas intermediate padding = " | 
 |         << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" | 
 |         << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" | 
 |         << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n"; | 
 |  | 
 |     Str << "Stack details:\n" | 
 |         << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" | 
 |         << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" | 
 |         << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n" | 
 |         << " locals spill area alignment = " << LocalsSlotsAlignmentBytes | 
 |         << " bytes\n" | 
 |         << " is FP based = " << UsesFramePointer << "\n"; | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::addEpilog(CfgNode *Node) { | 
 |   InstList &Insts = Node->getInsts(); | 
 |   InstList::reverse_iterator RI, E; | 
 |   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | 
 |     if (llvm::isa<InstARM32Ret>(*RI)) | 
 |       break; | 
 |   } | 
 |   if (RI == E) | 
 |     return; | 
 |  | 
 |   // Convert the reverse_iterator position into its corresponding (forward) | 
 |   // iterator position. | 
 |   InstList::iterator InsertPoint = reverseToForwardIterator(RI); | 
 |   --InsertPoint; | 
 |   Context.init(Node); | 
 |   Context.setInsertPoint(InsertPoint); | 
 |  | 
 |   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 
 |   if (UsesFramePointer) { | 
 |     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 
 |     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake | 
 |     // use of SP before the assignment of SP=FP keeps previous SP adjustments | 
 |     // from being dead-code eliminated. | 
 |     Context.insert<InstFakeUse>(SP); | 
 |     Sandboxer(this).reset_sp(FP); | 
 |   } else { | 
 |     // add SP, SpillAreaSizeBytes | 
 |     if (SpillAreaSizeBytes) { | 
 |       // Use the scratch register if needed to legalize the immediate. | 
 |       Operand *AddAmount = | 
 |           legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 
 |                    Legal_Reg | Legal_Flex, getReservedTmpReg()); | 
 |       Sandboxer(this).add_sp(AddAmount); | 
 |     } | 
 |   } | 
 |  | 
 |   if (!PreservedGPRs.empty()) | 
 |     _pop(PreservedGPRs); | 
 |   if (!PreservedSRegs.empty()) | 
 |     _pop(PreservedSRegs); | 
 |  | 
 |   if (!getFlags().getUseSandboxing()) | 
 |     return; | 
 |  | 
 |   // Change the original ret instruction into a sandboxed return sequence. | 
 |   // | 
 |   // bundle_lock | 
 |   // bic lr, #0xc000000f | 
 |   // bx lr | 
 |   // bundle_unlock | 
 |   // | 
 |   // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to | 
 |   // restrict to the lower 1GB as well. | 
 |   Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); | 
 |   Variable *RetValue = nullptr; | 
 |   if (RI->getSrcSize()) | 
 |     RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 
 |  | 
 |   Sandboxer(this).ret(LR, RetValue); | 
 |  | 
 |   RI->setDeleted(); | 
 | } | 
 |  | 
 | bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const { | 
 |   constexpr bool ZeroExt = false; | 
 |   return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset); | 
 | } | 
 |  | 
 | Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister( | 
 |     Variable *Base, int32_t Offset, RegNumT ScratchRegNum) { | 
 |   // Legalize will likely need a movw/movt combination, but if the top bits are | 
 |   // all 0 from negating the offset and subtracting, we could use that instead. | 
 |   const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0; | 
 |   Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum); | 
 |   if (ShouldSub) { | 
 |     Operand *OffsetVal = | 
 |         Target->legalize(Target->Ctx->getConstantInt32(-Offset), | 
 |                          Legal_Reg | Legal_Flex, ScratchRegNum); | 
 |     Target->_sub(ScratchReg, Base, OffsetVal); | 
 |   } else { | 
 |     Operand *OffsetVal = | 
 |         Target->legalize(Target->Ctx->getConstantInt32(Offset), | 
 |                          Legal_Reg | Legal_Flex, ScratchRegNum); | 
 |     Target->_add(ScratchReg, Base, OffsetVal); | 
 |   } | 
 |  | 
 |   if (ScratchRegNum == Target->getReservedTmpReg()) { | 
 |     const bool BaseIsStackOrFramePtr = | 
 |         Base->getRegNum() == Target->getFrameOrStackReg(); | 
 |     // There is currently no code path that would trigger this assertion, so we | 
 |     // leave this assertion here in case it is ever violated. This is not a | 
 |     // fatal error (thus the use of assert() and not llvm::report_fatal_error) | 
 |     // as the program compiled by subzero will still work correctly. | 
 |     assert(BaseIsStackOrFramePtr); | 
 |     // Side-effect: updates TempBase to reflect the new Temporary. | 
 |     if (BaseIsStackOrFramePtr) { | 
 |       TempBaseReg = ScratchReg; | 
 |       TempBaseOffset = Offset; | 
 |     } else { | 
 |       TempBaseReg = nullptr; | 
 |       TempBaseOffset = 0; | 
 |     } | 
 |   } | 
 |  | 
 |   return ScratchReg; | 
 | } | 
 |  | 
 | OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand( | 
 |     Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) { | 
 |   assert(!Base->isRematerializable()); | 
 |   if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) { | 
 |     return OperandARM32Mem::create( | 
 |         Target->Func, Ty, Base, | 
 |         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)), | 
 |         OperandARM32Mem::Offset); | 
 |   } | 
 |  | 
 |   if (!AllowOffsets || TempBaseReg == nullptr) { | 
 |     newBaseRegister(Base, Offset, Target->getReservedTmpReg()); | 
 |   } | 
 |  | 
 |   int32_t OffsetDiff = Offset - TempBaseOffset; | 
 |   assert(AllowOffsets || OffsetDiff == 0); | 
 |  | 
 |   if (!Target->isLegalMemOffset(Ty, OffsetDiff)) { | 
 |     newBaseRegister(Base, Offset, Target->getReservedTmpReg()); | 
 |     OffsetDiff = 0; | 
 |   } | 
 |  | 
 |   assert(!TempBaseReg->isRematerializable()); | 
 |   return OperandARM32Mem::create( | 
 |       Target->Func, Ty, TempBaseReg, | 
 |       llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)), | 
 |       OperandARM32Mem::Offset); | 
 | } | 
 |  | 
 | void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy( | 
 |     const Inst *Instr) { | 
 |   bool ClobbersTempBase = false; | 
 |   if (TempBaseReg != nullptr) { | 
 |     Variable *Dest = Instr->getDest(); | 
 |     if (llvm::isa<InstARM32Call>(Instr)) { | 
 |       // The following assertion is an invariant, so we remove it from the if | 
 |       // test. If the invariant is ever broken/invalidated/changed, remember | 
 |       // to add it back to the if condition. | 
 |       assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg()); | 
 |       // The linker may need to clobber IP if the call is too far from PC. Thus, | 
 |       // we assume IP will be overwritten. | 
 |       ClobbersTempBase = true; | 
 |     } else if (Dest != nullptr && | 
 |                Dest->getRegNum() == TempBaseReg->getRegNum()) { | 
 |       // Register redefinition. | 
 |       ClobbersTempBase = true; | 
 |     } | 
 |   } | 
 |  | 
 |   if (ClobbersTempBase) { | 
 |     TempBaseReg = nullptr; | 
 |     TempBaseOffset = 0; | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) { | 
 |   Variable *Dest = MovInstr->getDest(); | 
 |   assert(Dest != nullptr); | 
 |   Type DestTy = Dest->getType(); | 
 |   assert(DestTy != IceType_i64); | 
 |  | 
 |   Operand *Src = MovInstr->getSrc(0); | 
 |   Type SrcTy = Src->getType(); | 
 |   (void)SrcTy; | 
 |   assert(SrcTy != IceType_i64); | 
 |  | 
 |   if (MovInstr->isMultiDest() || MovInstr->isMultiSource()) | 
 |     return; | 
 |  | 
 |   bool Legalized = false; | 
 |   if (!Dest->hasReg()) { | 
 |     auto *SrcR = llvm::cast<Variable>(Src); | 
 |     assert(SrcR->hasReg()); | 
 |     assert(!SrcR->isRematerializable()); | 
 |     const int32_t Offset = Dest->getStackOffset(); | 
 |     // This is a _mov(Mem(), Variable), i.e., a store. | 
 |     TargetARM32::Sandboxer(Target) | 
 |         .str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset), | 
 |              MovInstr->getPredicate()); | 
 |     // _str() does not have a Dest, so we add a fake-def(Dest). | 
 |     Target->Context.insert<InstFakeDef>(Dest); | 
 |     Legalized = true; | 
 |   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { | 
 |     if (Var->isRematerializable()) { | 
 |       // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable). | 
 |  | 
 |       // ExtraOffset is only needed for frame-pointer based frames as we have | 
 |       // to account for spill storage. | 
 |       const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg()) | 
 |                                       ? Target->getFrameFixedAllocaOffset() | 
 |                                       : 0; | 
 |  | 
 |       const int32_t Offset = Var->getStackOffset() + ExtraOffset; | 
 |       Variable *Base = Target->getPhysicalRegister(Var->getRegNum()); | 
 |       Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum()); | 
 |       Target->_mov(Dest, T); | 
 |       Legalized = true; | 
 |     } else { | 
 |       if (!Var->hasReg()) { | 
 |         // This is a _mov(Variable, Mem()), i.e., a load. | 
 |         const int32_t Offset = Var->getStackOffset(); | 
 |         TargetARM32::Sandboxer(Target) | 
 |             .ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset), | 
 |                  MovInstr->getPredicate()); | 
 |         Legalized = true; | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   if (Legalized) { | 
 |     if (MovInstr->isDestRedefined()) { | 
 |       Target->_set_dest_redefined(); | 
 |     } | 
 |     MovInstr->setDeleted(); | 
 |   } | 
 | } | 
 |  | 
 | // ARM32 address modes: | 
 | //  ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12], | 
 | //                    [reg +/- reg << shamt5] | 
 | //  ld/st f[32|64]  : [reg], [reg +/- imm8] , [pc +/- imm8] | 
 | //  ld/st vectors   : [reg] | 
 | // | 
 | // For now, we don't handle address modes with Relocatables. | 
 | namespace { | 
 | // MemTraits contains per-type valid address mode information. | 
 | #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \ | 
 |           ubits, rraddr, shaddr)                                               \ | 
 |   static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag); | 
 | ICETYPEARM32_TABLE | 
 | #undef X | 
 |  | 
 | static const struct { | 
 |   int32_t ValidImmMask; | 
 |   bool CanHaveImm; | 
 |   bool CanHaveIndex; | 
 |   bool CanHaveShiftedIndex; | 
 | } MemTraits[] = { | 
 | #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \ | 
 |           ubits, rraddr, shaddr)                                               \ | 
 |   { (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, }                           \ | 
 |   , | 
 |     ICETYPEARM32_TABLE | 
 | #undef X | 
 | }; | 
 | static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits); | 
 | } // end of anonymous namespace | 
 |  | 
 | OperandARM32Mem * | 
 | TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem, | 
 |                                                        bool AllowOffsets) { | 
 |   assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable()); | 
 |   assert( | 
 |       Mem->isRegReg() || | 
 |       Target->isLegalMemOffset(Mem->getType(), Mem->getOffset()->getValue())); | 
 |  | 
 |   bool Legalized = false; | 
 |   Variable *Base = Mem->getBase(); | 
 |   int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue(); | 
 |   if (Base->isRematerializable()) { | 
 |     const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg()) | 
 |                                     ? Target->getFrameFixedAllocaOffset() | 
 |                                     : 0; | 
 |     Offset += Base->getStackOffset() + ExtraOffset; | 
 |     Base = Target->getPhysicalRegister(Base->getRegNum()); | 
 |     assert(!Base->isRematerializable()); | 
 |     Legalized = true; | 
 |   } | 
 |  | 
 |   if (!Legalized && !Target->NeedSandboxing) { | 
 |     return nullptr; | 
 |   } | 
 |  | 
 |   if (!Mem->isRegReg()) { | 
 |     return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets); | 
 |   } | 
 |  | 
 |   if (Target->NeedSandboxing) { | 
 |     llvm::report_fatal_error("Reg-Reg address mode is not allowed."); | 
 |   } | 
 |  | 
 |   assert(MemTraits[Mem->getType()].CanHaveIndex); | 
 |  | 
 |   if (Offset != 0) { | 
 |     if (TempBaseReg == nullptr) { | 
 |       Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg()); | 
 |     } else { | 
 |       uint32_t Imm8, Rotate; | 
 |       const int32_t OffsetDiff = Offset - TempBaseOffset; | 
 |       if (OffsetDiff == 0) { | 
 |         Base = TempBaseReg; | 
 |       } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) { | 
 |         auto *OffsetDiffF = OperandARM32FlexImm::create( | 
 |             Target->Func, IceType_i32, Imm8, Rotate); | 
 |         Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF); | 
 |         TempBaseOffset += OffsetDiff; | 
 |         Base = TempBaseReg; | 
 |       } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) { | 
 |         auto *OffsetDiffF = OperandARM32FlexImm::create( | 
 |             Target->Func, IceType_i32, Imm8, Rotate); | 
 |         Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF); | 
 |         TempBaseOffset += OffsetDiff; | 
 |         Base = TempBaseReg; | 
 |       } else { | 
 |         Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg()); | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   return OperandARM32Mem::create(Target->Func, Mem->getType(), Base, | 
 |                                  Mem->getIndex(), Mem->getShiftOp(), | 
 |                                  Mem->getShiftAmt(), Mem->getAddrMode()); | 
 | } | 
 |  | 
 | void TargetARM32::postLowerLegalization() { | 
 |   // If a stack variable's frame offset doesn't fit, convert from: | 
 |   //   ldr X, OFF[SP] | 
 |   // to: | 
 |   //   movw/movt TMP, OFF_PART | 
 |   //   add TMP, TMP, SP | 
 |   //   ldr X, OFF_MORE[TMP] | 
 |   // | 
 |   // This is safe because we have reserved TMP, and add for ARM does not | 
 |   // clobber the flags register. | 
 |   Func->dump("Before postLowerLegalization"); | 
 |   assert(hasComputedFrame()); | 
 |   // Do a fairly naive greedy clustering for now. Pick the first stack slot | 
 |   // that's out of bounds and make a new base reg using the architecture's temp | 
 |   // register. If that works for the next slot, then great. Otherwise, create a | 
 |   // new base register, clobbering the previous base register. Never share a | 
 |   // base reg across different basic blocks. This isn't ideal if local and | 
 |   // multi-block variables are far apart and their references are interspersed. | 
 |   // It may help to be more coordinated about assign stack slot numbers and may | 
 |   // help to assign smaller offsets to higher-weight variables so that they | 
 |   // don't depend on this legalization. | 
 |   for (CfgNode *Node : Func->getNodes()) { | 
 |     Context.init(Node); | 
 |     // One legalizer per basic block, otherwise we would share the Temporary | 
 |     // Base Register between basic blocks. | 
 |     PostLoweringLegalizer Legalizer(this); | 
 |     while (!Context.atEnd()) { | 
 |       PostIncrLoweringContext PostIncrement(Context); | 
 |       Inst *CurInstr = iteratorToInst(Context.getCur()); | 
 |  | 
 |       // Check if the previous TempBaseReg is clobbered, and reset if needed. | 
 |       Legalizer.resetTempBaseIfClobberedBy(CurInstr); | 
 |  | 
 |       if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { | 
 |         Legalizer.legalizeMov(MovInstr); | 
 |       } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) { | 
 |         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( | 
 |                 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) { | 
 |           Sandboxer(this) | 
 |               .ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate()); | 
 |           CurInstr->setDeleted(); | 
 |         } | 
 |       } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) { | 
 |         constexpr bool DisallowOffsetsBecauseLdrex = false; | 
 |         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( | 
 |                 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)), | 
 |                 DisallowOffsetsBecauseLdrex)) { | 
 |           Sandboxer(this) | 
 |               .ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate()); | 
 |           CurInstr->setDeleted(); | 
 |         } | 
 |       } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) { | 
 |         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( | 
 |                 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) { | 
 |           Sandboxer(this).str(llvm::cast<Variable>(CurInstr->getSrc(0)), | 
 |                               LegalMem, StrInstr->getPredicate()); | 
 |           CurInstr->setDeleted(); | 
 |         } | 
 |       } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) { | 
 |         constexpr bool DisallowOffsetsBecauseStrex = false; | 
 |         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( | 
 |                 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)), | 
 |                 DisallowOffsetsBecauseStrex)) { | 
 |           Sandboxer(this).strex(CurInstr->getDest(), | 
 |                                 llvm::cast<Variable>(CurInstr->getSrc(0)), | 
 |                                 LegalMem, StrexInstr->getPredicate()); | 
 |           CurInstr->setDeleted(); | 
 |         } | 
 |       } | 
 |  | 
 |       // Sanity-check: the Legalizer will either have no Temp, or it will be | 
 |       // bound to IP. | 
 |       Legalizer.assertNoTempOrAssignedToIP(); | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | Operand *TargetARM32::loOperand(Operand *Operand) { | 
 |   assert(Operand->getType() == IceType_i64); | 
 |   if (Operand->getType() != IceType_i64) | 
 |     return Operand; | 
 |   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) | 
 |     return Var64On32->getLo(); | 
 |   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) | 
 |     return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue())); | 
 |   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { | 
 |     // Conservatively disallow memory operands with side-effects (pre/post | 
 |     // increment) in case of duplication. | 
 |     assert(Mem->getAddrMode() == OperandARM32Mem::Offset || | 
 |            Mem->getAddrMode() == OperandARM32Mem::NegOffset); | 
 |     if (Mem->isRegReg()) { | 
 |       Variable *IndexR = legalizeToReg(Mem->getIndex()); | 
 |       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR, | 
 |                                      Mem->getShiftOp(), Mem->getShiftAmt(), | 
 |                                      Mem->getAddrMode()); | 
 |     } else { | 
 |       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), | 
 |                                      Mem->getOffset(), Mem->getAddrMode()); | 
 |     } | 
 |   } | 
 |   llvm::report_fatal_error("Unsupported operand type"); | 
 |   return nullptr; | 
 | } | 
 |  | 
 | Operand *TargetARM32::hiOperand(Operand *Operand) { | 
 |   assert(Operand->getType() == IceType_i64); | 
 |   if (Operand->getType() != IceType_i64) | 
 |     return Operand; | 
 |   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) | 
 |     return Var64On32->getHi(); | 
 |   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 
 |     return Ctx->getConstantInt32( | 
 |         static_cast<uint32_t>(Const->getValue() >> 32)); | 
 |   } | 
 |   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { | 
 |     // Conservatively disallow memory operands with side-effects in case of | 
 |     // duplication. | 
 |     assert(Mem->getAddrMode() == OperandARM32Mem::Offset || | 
 |            Mem->getAddrMode() == OperandARM32Mem::NegOffset); | 
 |     const Type SplitType = IceType_i32; | 
 |     if (Mem->isRegReg()) { | 
 |       // We have to make a temp variable T, and add 4 to either Base or Index. | 
 |       // The Index may be shifted, so adding 4 can mean something else. Thus, | 
 |       // prefer T := Base + 4, and use T as the new Base. | 
 |       Variable *Base = Mem->getBase(); | 
 |       Constant *Four = Ctx->getConstantInt32(4); | 
 |       Variable *NewBase = Func->makeVariable(Base->getType()); | 
 |       lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, | 
 |                                              Base, Four)); | 
 |       Variable *BaseR = legalizeToReg(NewBase); | 
 |       Variable *IndexR = legalizeToReg(Mem->getIndex()); | 
 |       return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR, | 
 |                                      Mem->getShiftOp(), Mem->getShiftAmt(), | 
 |                                      Mem->getAddrMode()); | 
 |     } else { | 
 |       Variable *Base = Mem->getBase(); | 
 |       ConstantInteger32 *Offset = Mem->getOffset(); | 
 |       assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); | 
 |       int32_t NextOffsetVal = Offset->getValue() + 4; | 
 |       constexpr bool ZeroExt = false; | 
 |       if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) { | 
 |         // We have to make a temp variable and add 4 to either Base or Offset. | 
 |         // If we add 4 to Offset, this will convert a non-RegReg addressing | 
 |         // mode into a RegReg addressing mode. Since NaCl sandboxing disallows | 
 |         // RegReg addressing modes, prefer adding to base and replacing | 
 |         // instead. Thus we leave the old offset alone. | 
 |         Constant *_4 = Ctx->getConstantInt32(4); | 
 |         Variable *NewBase = Func->makeVariable(Base->getType()); | 
 |         lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, | 
 |                                                NewBase, Base, _4)); | 
 |         Base = NewBase; | 
 |       } else { | 
 |         Offset = | 
 |             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); | 
 |       } | 
 |       Variable *BaseR = legalizeToReg(Base); | 
 |       return OperandARM32Mem::create(Func, SplitType, BaseR, Offset, | 
 |                                      Mem->getAddrMode()); | 
 |     } | 
 |   } | 
 |   llvm::report_fatal_error("Unsupported operand type"); | 
 |   return nullptr; | 
 | } | 
 |  | 
 | SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include, | 
 |                                            RegSetMask Exclude) const { | 
 |   SmallBitVector Registers(RegARM32::Reg_NUM); | 
 |  | 
 |   for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) { | 
 |     const auto &Entry = RegARM32::RegTable[i]; | 
 |     if (Entry.Scratch && (Include & RegSet_CallerSave)) | 
 |       Registers[i] = true; | 
 |     if (Entry.Preserved && (Include & RegSet_CalleeSave)) | 
 |       Registers[i] = true; | 
 |     if (Entry.StackPtr && (Include & RegSet_StackPointer)) | 
 |       Registers[i] = true; | 
 |     if (Entry.FramePtr && (Include & RegSet_FramePointer)) | 
 |       Registers[i] = true; | 
 |     if (Entry.Scratch && (Exclude & RegSet_CallerSave)) | 
 |       Registers[i] = false; | 
 |     if (Entry.Preserved && (Exclude & RegSet_CalleeSave)) | 
 |       Registers[i] = false; | 
 |     if (Entry.StackPtr && (Exclude & RegSet_StackPointer)) | 
 |       Registers[i] = false; | 
 |     if (Entry.FramePtr && (Exclude & RegSet_FramePointer)) | 
 |       Registers[i] = false; | 
 |   } | 
 |  | 
 |   return Registers; | 
 | } | 
 |  | 
 | void TargetARM32::lowerAlloca(const InstAlloca *Instr) { | 
 |   // Conservatively require the stack to be aligned. Some stack adjustment | 
 |   // operations implemented below assume that the stack is aligned before the | 
 |   // alloca. All the alloca code ensures that the stack alignment is preserved | 
 |   // after the alloca. The stack alignment restriction can be relaxed in some | 
 |   // cases. | 
 |   NeedsStackAlignment = true; | 
 |  | 
 |   // For default align=0, set it to the real value 1, to avoid any | 
 |   // bit-manipulation problems below. | 
 |   const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); | 
 |  | 
 |   // LLVM enforces power of 2 alignment. | 
 |   assert(llvm::isPowerOf2_32(AlignmentParam)); | 
 |   assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); | 
 |  | 
 |   const uint32_t Alignment = | 
 |       std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); | 
 |   const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES; | 
 |   const bool OptM1 = Func->getOptLevel() == Opt_m1; | 
 |   const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); | 
 |   const bool UseFramePointer = | 
 |       hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; | 
 |  | 
 |   if (UseFramePointer) | 
 |     setHasFramePointer(); | 
 |  | 
 |   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 
 |   if (OverAligned) { | 
 |     Sandboxer(this).align_sp(Alignment); | 
 |   } | 
 |  | 
 |   Variable *Dest = Instr->getDest(); | 
 |   Operand *TotalSize = Instr->getSizeInBytes(); | 
 |  | 
 |   if (const auto *ConstantTotalSize = | 
 |           llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 
 |     const uint32_t Value = | 
 |         Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); | 
 |     // Constant size alloca. | 
 |     if (!UseFramePointer) { | 
 |       // If we don't need a Frame Pointer, this alloca has a known offset to the | 
 |       // stack pointer. We don't need adjust the stack pointer, nor assign any | 
 |       // value to Dest, as Dest is rematerializable. | 
 |       assert(Dest->isRematerializable()); | 
 |       FixedAllocaSizeBytes += Value; | 
 |       Context.insert<InstFakeDef>(Dest); | 
 |       return; | 
 |     } | 
 |  | 
 |     // If a frame pointer is required, then we need to store the alloca'd result | 
 |     // in Dest. | 
 |     Operand *SubAmountRF = | 
 |         legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex); | 
 |     Sandboxer(this).sub_sp(SubAmountRF); | 
 |   } else { | 
 |     // Non-constant sizes need to be adjusted to the next highest multiple of | 
 |     // the required alignment at runtime. | 
 |     TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); | 
 |     Variable *T = makeReg(IceType_i32); | 
 |     _mov(T, TotalSize); | 
 |     Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); | 
 |     _add(T, T, AddAmount); | 
 |     alignRegisterPow2(T, Alignment); | 
 |     Sandboxer(this).sub_sp(T); | 
 |   } | 
 |  | 
 |   // Adds back a few bytes to SP to account for the out args area. | 
 |   Variable *T = SP; | 
 |   if (MaxOutArgsSizeBytes != 0) { | 
 |     T = makeReg(getPointerType()); | 
 |     Operand *OutArgsSizeRF = legalize( | 
 |         Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex); | 
 |     _add(T, SP, OutArgsSizeRF); | 
 |   } | 
 |  | 
 |   _mov(Dest, T); | 
 | } | 
 |  | 
 | void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { | 
 |   if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) | 
 |     return; | 
 |   Variable *SrcLoReg = legalizeToReg(SrcLo); | 
 |   switch (Ty) { | 
 |   default: | 
 |     llvm_unreachable( | 
 |         ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str()); | 
 |   case IceType_i8: | 
 |   case IceType_i16: { | 
 |     Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty)); | 
 |     Variable *T = makeReg(IceType_i32); | 
 |     _lsls(T, SrcLoReg, ShAmtImm); | 
 |     Context.insert<InstFakeUse>(T); | 
 |   } break; | 
 |   case IceType_i32: { | 
 |     _tst(SrcLoReg, SrcLoReg); | 
 |     break; | 
 |   } | 
 |   case IceType_i64: { | 
 |     Variable *T = makeReg(IceType_i32); | 
 |     _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex)); | 
 |     // T isn't going to be used, but we need the side-effect of setting flags | 
 |     // from this operation. | 
 |     Context.insert<InstFakeUse>(T); | 
 |   } | 
 |   } | 
 |   auto *Label = InstARM32Label::create(Func, this); | 
 |   _br(Label, CondARM32::NE); | 
 |   _trap(); | 
 |   Context.insert(Label); | 
 | } | 
 |  | 
 | void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, | 
 |                                Operand *Src1, ExtInstr ExtFunc, | 
 |                                DivInstr DivFunc, bool IsRemainder) { | 
 |   div0Check(Dest->getType(), Src1, nullptr); | 
 |   Variable *Src1R = legalizeToReg(Src1); | 
 |   Variable *T0R = Src0R; | 
 |   Variable *T1R = Src1R; | 
 |   if (Dest->getType() != IceType_i32) { | 
 |     T0R = makeReg(IceType_i32); | 
 |     (this->*ExtFunc)(T0R, Src0R, CondARM32::AL); | 
 |     T1R = makeReg(IceType_i32); | 
 |     (this->*ExtFunc)(T1R, Src1R, CondARM32::AL); | 
 |   } | 
 |   if (hasCPUFeature(TargetARM32Features::HWDivArm)) { | 
 |     (this->*DivFunc)(T, T0R, T1R, CondARM32::AL); | 
 |     if (IsRemainder) { | 
 |       Variable *T2 = makeReg(IceType_i32); | 
 |       _mls(T2, T, T1R, T0R); | 
 |       T = T2; | 
 |     } | 
 |     _mov(Dest, T); | 
 |   } else { | 
 |     llvm::report_fatal_error("div should have already been turned into a call"); | 
 |   } | 
 | } | 
 |  | 
 | TargetARM32::SafeBoolChain | 
 | TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) { | 
 |   Variable *Dest = Instr->getDest(); | 
 |   assert(Dest->getType() == IceType_i1); | 
 |  | 
 |   // So folding didn't work for Instr. Not a problem: We just need to | 
 |   // materialize the Sources, and perform the operation. We create regular | 
 |   // Variables (and not infinite-weight ones) because this call might recurse a | 
 |   // lot, and we might end up with tons of infinite weight temporaries. | 
 |   assert(Instr->getSrcSize() == 2); | 
 |   Variable *Src0 = Func->makeVariable(IceType_i1); | 
 |   SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0)); | 
 |  | 
 |   Operand *Src1 = Instr->getSrc(1); | 
 |   SafeBoolChain Src1Safe = SBC_Yes; | 
 |  | 
 |   if (!llvm::isa<Constant>(Src1)) { | 
 |     Variable *Src1V = Func->makeVariable(IceType_i1); | 
 |     Src1Safe = lowerInt1(Src1V, Src1); | 
 |     Src1 = Src1V; | 
 |   } | 
 |  | 
 |   Variable *T = makeReg(IceType_i1); | 
 |   Src0 = legalizeToReg(Src0); | 
 |   Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | 
 |   switch (Instr->getOp()) { | 
 |   default: | 
 |     // If this Unreachable is ever executed, add the offending operation to | 
 |     // the list of valid consumers. | 
 |     llvm::report_fatal_error("Unhandled i1 Op"); | 
 |   case InstArithmetic::And: | 
 |     _and(T, Src0, Src1RF); | 
 |     break; | 
 |   case InstArithmetic::Or: | 
 |     _orr(T, Src0, Src1RF); | 
 |     break; | 
 |   case InstArithmetic::Xor: | 
 |     _eor(T, Src0, Src1RF); | 
 |     break; | 
 |   } | 
 |   _mov(Dest, T); | 
 |   return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; | 
 | } | 
 |  | 
 | namespace { | 
 | // NumericOperands is used during arithmetic/icmp lowering for constant folding. | 
 | // It holds the two sources operands, and maintains some state as to whether one | 
 | // of them is a constant. If one of the operands is a constant, then it will be | 
 | // be stored as the operation's second source, with a bit indicating whether the | 
 | // operands were swapped. | 
 | // | 
 | // The class is split into a base class with operand type-independent methods, | 
 | // and a derived, templated class, for each type of operand we want to fold | 
 | // constants for: | 
 | // | 
 | // NumericOperandsBase --> NumericOperands<ConstantFloat> | 
 | //                     --> NumericOperands<ConstantDouble> | 
 | //                     --> NumericOperands<ConstantInt32> | 
 | // | 
 | // NumericOperands<ConstantInt32> also exposes helper methods for emitting | 
 | // inverted/negated immediates. | 
 | class NumericOperandsBase { | 
 |   NumericOperandsBase() = delete; | 
 |   NumericOperandsBase(const NumericOperandsBase &) = delete; | 
 |   NumericOperandsBase &operator=(const NumericOperandsBase &) = delete; | 
 |  | 
 | public: | 
 |   NumericOperandsBase(Operand *S0, Operand *S1) | 
 |       : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)), | 
 |         Swapped(Src0 == S1 && S0 != S1) { | 
 |     assert(Src0 != nullptr); | 
 |     assert(Src1 != nullptr); | 
 |     assert(Src0 != Src1 || S0 == S1); | 
 |   } | 
 |  | 
 |   bool hasConstOperand() const { | 
 |     return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1); | 
 |   } | 
 |  | 
 |   bool swappedOperands() const { return Swapped; } | 
 |  | 
 |   Variable *src0R(TargetARM32 *Target) const { | 
 |     return legalizeToReg(Target, Src0); | 
 |   } | 
 |  | 
 |   Variable *unswappedSrc0R(TargetARM32 *Target) const { | 
 |     return legalizeToReg(Target, Swapped ? Src1 : Src0); | 
 |   } | 
 |  | 
 |   Operand *src1RF(TargetARM32 *Target) const { | 
 |     return legalizeToRegOrFlex(Target, Src1); | 
 |   } | 
 |  | 
 |   Variable *unswappedSrc1R(TargetARM32 *Target) const { | 
 |     return legalizeToReg(Target, Swapped ? Src0 : Src1); | 
 |   } | 
 |  | 
 |   Operand *src1() const { return Src1; } | 
 |  | 
 | protected: | 
 |   Operand *const Src0; | 
 |   Operand *const Src1; | 
 |   const bool Swapped; | 
 |  | 
 |   static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) { | 
 |     return Target->legalizeToReg(Src); | 
 |   } | 
 |  | 
 |   static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) { | 
 |     return Target->legalize(Src, | 
 |                             TargetARM32::Legal_Reg | TargetARM32::Legal_Flex); | 
 |   } | 
 |  | 
 | private: | 
 |   static Operand *NonConstOperand(Operand *S0, Operand *S1) { | 
 |     if (!llvm::isa<Constant>(S0)) | 
 |       return S0; | 
 |     if (!llvm::isa<Constant>(S1)) | 
 |       return S1; | 
 |     if (llvm::isa<ConstantRelocatable>(S1) && | 
 |         !llvm::isa<ConstantRelocatable>(S0)) | 
 |       return S1; | 
 |     return S0; | 
 |   } | 
 |  | 
 |   static Operand *ConstOperand(Operand *S0, Operand *S1) { | 
 |     if (!llvm::isa<Constant>(S0)) | 
 |       return S1; | 
 |     if (!llvm::isa<Constant>(S1)) | 
 |       return S0; | 
 |     if (llvm::isa<ConstantRelocatable>(S1) && | 
 |         !llvm::isa<ConstantRelocatable>(S0)) | 
 |       return S0; | 
 |     return S1; | 
 |   } | 
 | }; | 
 |  | 
 | template <typename C> class NumericOperands : public NumericOperandsBase { | 
 |   NumericOperands() = delete; | 
 |   NumericOperands(const NumericOperands &) = delete; | 
 |   NumericOperands &operator=(const NumericOperands &) = delete; | 
 |  | 
 | public: | 
 |   NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) { | 
 |     assert(!hasConstOperand() || llvm::isa<C>(this->Src1)); | 
 |   } | 
 |  | 
 |   typename C::PrimType getConstantValue() const { | 
 |     return llvm::cast<C>(Src1)->getValue(); | 
 |   } | 
 | }; | 
 |  | 
 | using FloatOperands = NumericOperands<ConstantFloat>; | 
 | using DoubleOperands = NumericOperands<ConstantDouble>; | 
 |  | 
 | class Int32Operands : public NumericOperands<ConstantInteger32> { | 
 |   Int32Operands() = delete; | 
 |   Int32Operands(const Int32Operands &) = delete; | 
 |   Int32Operands &operator=(const Int32Operands &) = delete; | 
 |  | 
 | public: | 
 |   Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {} | 
 |  | 
 |   Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const { | 
 |     if (!swappedOperands() && hasConstOperand()) { | 
 |       return Target->shAmtImm(getConstantValue() & 0x1F); | 
 |     } | 
 |     return legalizeToReg(Target, Swapped ? Src0 : Src1); | 
 |   } | 
 |  | 
 |   bool isSrc1ImmediateZero() const { | 
 |     if (!swappedOperands() && hasConstOperand()) { | 
 |       return getConstantValue() == 0; | 
 |     } | 
 |     return false; | 
 |   } | 
 |  | 
 |   bool immediateIsFlexEncodable() const { | 
 |     uint32_t Rotate, Imm8; | 
 |     return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8); | 
 |   } | 
 |  | 
 |   bool negatedImmediateIsFlexEncodable() const { | 
 |     uint32_t Rotate, Imm8; | 
 |     return OperandARM32FlexImm::canHoldImm( | 
 |         -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8); | 
 |   } | 
 |  | 
 |   Operand *negatedSrc1F(TargetARM32 *Target) const { | 
 |     return legalizeToRegOrFlex(Target, | 
 |                                Target->getCtx()->getConstantInt32( | 
 |                                    -static_cast<int32_t>(getConstantValue()))); | 
 |   } | 
 |  | 
 |   bool invertedImmediateIsFlexEncodable() const { | 
 |     uint32_t Rotate, Imm8; | 
 |     return OperandARM32FlexImm::canHoldImm( | 
 |         ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8); | 
 |   } | 
 |  | 
 |   Operand *invertedSrc1F(TargetARM32 *Target) const { | 
 |     return legalizeToRegOrFlex(Target, | 
 |                                Target->getCtx()->getConstantInt32( | 
 |                                    ~static_cast<uint32_t>(getConstantValue()))); | 
 |   } | 
 | }; | 
 | } // end of anonymous namespace | 
 |  | 
 | void TargetARM32::preambleDivRem(const InstCall *Instr) { | 
 |   Operand *Src1 = Instr->getArg(1); | 
 |  | 
 |   switch (Src1->getType()) { | 
 |   default: | 
 |     llvm::report_fatal_error("Invalid type for idiv."); | 
 |   case IceType_i64: { | 
 |     if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) { | 
 |       if (C->getValue() == 0) { | 
 |         _trap(); | 
 |         return; | 
 |       } | 
 |     } | 
 |     div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1)); | 
 |     return; | 
 |   } | 
 |   case IceType_i32: { | 
 |     // Src0 and Src1 have already been appropriately extended to an i32, so we | 
 |     // don't check for i8 and i16. | 
 |     if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 
 |       if (C->getValue() == 0) { | 
 |         _trap(); | 
 |         return; | 
 |       } | 
 |     } | 
 |     div0Check(IceType_i32, Src1, nullptr); | 
 |     return; | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, | 
 |                                        Variable *Dest, Operand *Src0, | 
 |                                        Operand *Src1) { | 
 |   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); | 
 |   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); | 
 |   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); | 
 |   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); | 
 |  | 
 |   auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 
 |   auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 
 |   Variable *T_Lo = makeReg(DestLo->getType()); | 
 |   Variable *T_Hi = makeReg(DestHi->getType()); | 
 |  | 
 |   switch (Op) { | 
 |   case InstArithmetic::_num: | 
 |     llvm::report_fatal_error("Unknown arithmetic operator"); | 
 |     return; | 
 |   case InstArithmetic::Add: { | 
 |     Variable *Src0LoR = SrcsLo.src0R(this); | 
 |     Operand *Src1LoRF = SrcsLo.src1RF(this); | 
 |     Variable *Src0HiR = SrcsHi.src0R(this); | 
 |     Operand *Src1HiRF = SrcsHi.src1RF(this); | 
 |     _adds(T_Lo, Src0LoR, Src1LoRF); | 
 |     _mov(DestLo, T_Lo); | 
 |     _adc(T_Hi, Src0HiR, Src1HiRF); | 
 |     _mov(DestHi, T_Hi); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::And: { | 
 |     Variable *Src0LoR = SrcsLo.src0R(this); | 
 |     Operand *Src1LoRF = SrcsLo.src1RF(this); | 
 |     Variable *Src0HiR = SrcsHi.src0R(this); | 
 |     Operand *Src1HiRF = SrcsHi.src1RF(this); | 
 |     _and(T_Lo, Src0LoR, Src1LoRF); | 
 |     _mov(DestLo, T_Lo); | 
 |     _and(T_Hi, Src0HiR, Src1HiRF); | 
 |     _mov(DestHi, T_Hi); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Or: { | 
 |     Variable *Src0LoR = SrcsLo.src0R(this); | 
 |     Operand *Src1LoRF = SrcsLo.src1RF(this); | 
 |     Variable *Src0HiR = SrcsHi.src0R(this); | 
 |     Operand *Src1HiRF = SrcsHi.src1RF(this); | 
 |     _orr(T_Lo, Src0LoR, Src1LoRF); | 
 |     _mov(DestLo, T_Lo); | 
 |     _orr(T_Hi, Src0HiR, Src1HiRF); | 
 |     _mov(DestHi, T_Hi); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Xor: { | 
 |     Variable *Src0LoR = SrcsLo.src0R(this); | 
 |     Operand *Src1LoRF = SrcsLo.src1RF(this); | 
 |     Variable *Src0HiR = SrcsHi.src0R(this); | 
 |     Operand *Src1HiRF = SrcsHi.src1RF(this); | 
 |     _eor(T_Lo, Src0LoR, Src1LoRF); | 
 |     _mov(DestLo, T_Lo); | 
 |     _eor(T_Hi, Src0HiR, Src1HiRF); | 
 |     _mov(DestHi, T_Hi); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Sub: { | 
 |     Variable *Src0LoR = SrcsLo.src0R(this); | 
 |     Operand *Src1LoRF = SrcsLo.src1RF(this); | 
 |     Variable *Src0HiR = SrcsHi.src0R(this); | 
 |     Operand *Src1HiRF = SrcsHi.src1RF(this); | 
 |     if (SrcsLo.swappedOperands()) { | 
 |       _rsbs(T_Lo, Src0LoR, Src1LoRF); | 
 |       _mov(DestLo, T_Lo); | 
 |       _rsc(T_Hi, Src0HiR, Src1HiRF); | 
 |       _mov(DestHi, T_Hi); | 
 |     } else { | 
 |       _subs(T_Lo, Src0LoR, Src1LoRF); | 
 |       _mov(DestLo, T_Lo); | 
 |       _sbc(T_Hi, Src0HiR, Src1HiRF); | 
 |       _mov(DestHi, T_Hi); | 
 |     } | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Mul: { | 
 |     // GCC 4.8 does: | 
 |     // a=b*c ==> | 
 |     //   t_acc =(mul) (b.lo * c.hi) | 
 |     //   t_acc =(mla) (c.lo * b.hi) + t_acc | 
 |     //   t.hi,t.lo =(umull) b.lo * c.lo | 
 |     //   t.hi += t_acc | 
 |     //   a.lo = t.lo | 
 |     //   a.hi = t.hi | 
 |     // | 
 |     // LLVM does: | 
 |     //   t.hi,t.lo =(umull) b.lo * c.lo | 
 |     //   t.hi =(mla) (b.lo * c.hi) + t.hi | 
 |     //   t.hi =(mla) (b.hi * c.lo) + t.hi | 
 |     //   a.lo = t.lo | 
 |     //   a.hi = t.hi | 
 |     // | 
 |     // LLVM's lowering has fewer instructions, but more register pressure: | 
 |     // t.lo is live from beginning to end, while GCC delays the two-dest | 
 |     // instruction till the end, and kills c.hi immediately. | 
 |     Variable *T_Acc = makeReg(IceType_i32); | 
 |     Variable *T_Acc1 = makeReg(IceType_i32); | 
 |     Variable *T_Hi1 = makeReg(IceType_i32); | 
 |     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); | 
 |     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); | 
 |     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); | 
 |     Variable *Src1RHi = SrcsHi.unswappedSrc1R(this); | 
 |     _mul(T_Acc, Src0RLo, Src1RHi); | 
 |     _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); | 
 |     _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); | 
 |     _add(T_Hi, T_Hi1, T_Acc1); | 
 |     _mov(DestLo, T_Lo); | 
 |     _mov(DestHi, T_Hi); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Shl: { | 
 |     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { | 
 |       Variable *Src0RLo = SrcsLo.src0R(this); | 
 |       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. | 
 |       const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F; | 
 |       if (ShAmtImm == 0) { | 
 |         _mov(DestLo, Src0RLo); | 
 |         _mov(DestHi, SrcsHi.src0R(this)); | 
 |         return; | 
 |       } | 
 |  | 
 |       if (ShAmtImm >= 32) { | 
 |         if (ShAmtImm == 32) { | 
 |           _mov(DestHi, Src0RLo); | 
 |         } else { | 
 |           Operand *ShAmtOp = shAmtImm(ShAmtImm - 32); | 
 |           _lsl(T_Hi, Src0RLo, ShAmtOp); | 
 |           _mov(DestHi, T_Hi); | 
 |         } | 
 |  | 
 |         Operand *_0 = | 
 |             legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 
 |         _mov(T_Lo, _0); | 
 |         _mov(DestLo, T_Lo); | 
 |         return; | 
 |       } | 
 |  | 
 |       Variable *Src0RHi = SrcsHi.src0R(this); | 
 |       Operand *ShAmtOp = shAmtImm(ShAmtImm); | 
 |       Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm); | 
 |       _lsl(T_Hi, Src0RHi, ShAmtOp); | 
 |       _orr(T_Hi, T_Hi, | 
 |            OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 
 |                                        OperandARM32::LSR, ComplShAmtOp)); | 
 |       _mov(DestHi, T_Hi); | 
 |  | 
 |       _lsl(T_Lo, Src0RLo, ShAmtOp); | 
 |       _mov(DestLo, T_Lo); | 
 |       return; | 
 |     } | 
 |  | 
 |     // a=b<<c ==> | 
 |     // pnacl-llc does: | 
 |     // mov     t_b.lo, b.lo | 
 |     // mov     t_b.hi, b.hi | 
 |     // mov     t_c.lo, c.lo | 
 |     // rsb     T0, t_c.lo, #32 | 
 |     // lsr     T1, t_b.lo, T0 | 
 |     // orr     t_a.hi, T1, t_b.hi, lsl t_c.lo | 
 |     // sub     T2, t_c.lo, #32 | 
 |     // cmp     T2, #0 | 
 |     // lslge   t_a.hi, t_b.lo, T2 | 
 |     // lsl     t_a.lo, t_b.lo, t_c.lo | 
 |     // mov     a.lo, t_a.lo | 
 |     // mov     a.hi, t_a.hi | 
 |     // | 
 |     // GCC 4.8 does: | 
 |     // sub t_c1, c.lo, #32 | 
 |     // lsl t_hi, b.hi, c.lo | 
 |     // orr t_hi, t_hi, b.lo, lsl t_c1 | 
 |     // rsb t_c2, c.lo, #32 | 
 |     // orr t_hi, t_hi, b.lo, lsr t_c2 | 
 |     // lsl t_lo, b.lo, c.lo | 
 |     // a.lo = t_lo | 
 |     // a.hi = t_hi | 
 |     // | 
 |     // These are incompatible, therefore we mimic pnacl-llc. | 
 |     // Can be strength-reduced for constant-shifts, but we don't do that for | 
 |     // now. | 
 |     // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On | 
 |     // ARM, shifts only take the lower 8 bits of the shift register, and | 
 |     // saturate to the range 0-32, so the negative value will saturate to 32. | 
 |     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | 
 |     Operand *_0 = | 
 |         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 
 |     Variable *T0 = makeReg(IceType_i32); | 
 |     Variable *T1 = makeReg(IceType_i32); | 
 |     Variable *T2 = makeReg(IceType_i32); | 
 |     Variable *TA_Hi = makeReg(IceType_i32); | 
 |     Variable *TA_Lo = makeReg(IceType_i32); | 
 |     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); | 
 |     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); | 
 |     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); | 
 |     _rsb(T0, Src1RLo, _32); | 
 |     _lsr(T1, Src0RLo, T0); | 
 |     _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 
 |                                                 OperandARM32::LSL, Src1RLo)); | 
 |     _sub(T2, Src1RLo, _32); | 
 |     _cmp(T2, _0); | 
 |     _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); | 
 |     _set_dest_redefined(); | 
 |     _lsl(TA_Lo, Src0RLo, Src1RLo); | 
 |     _mov(DestLo, TA_Lo); | 
 |     _mov(DestHi, TA_Hi); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Lshr: | 
 |   case InstArithmetic::Ashr: { | 
 |     const bool ASR = Op == InstArithmetic::Ashr; | 
 |     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { | 
 |       Variable *Src0RHi = SrcsHi.src0R(this); | 
 |       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. | 
 |       const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F; | 
 |       if (ShAmt == 0) { | 
 |         _mov(DestHi, Src0RHi); | 
 |         _mov(DestLo, SrcsLo.src0R(this)); | 
 |         return; | 
 |       } | 
 |  | 
 |       if (ShAmt >= 32) { | 
 |         if (ShAmt == 32) { | 
 |           _mov(DestLo, Src0RHi); | 
 |         } else { | 
 |           Operand *ShAmtImm = shAmtImm(ShAmt - 32); | 
 |           if (ASR) { | 
 |             _asr(T_Lo, Src0RHi, ShAmtImm); | 
 |           } else { | 
 |             _lsr(T_Lo, Src0RHi, ShAmtImm); | 
 |           } | 
 |           _mov(DestLo, T_Lo); | 
 |         } | 
 |  | 
 |         if (ASR) { | 
 |           Operand *_31 = shAmtImm(31); | 
 |           _asr(T_Hi, Src0RHi, _31); | 
 |         } else { | 
 |           Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32), | 
 |                                  Legal_Reg | Legal_Flex); | 
 |           _mov(T_Hi, _0); | 
 |         } | 
 |         _mov(DestHi, T_Hi); | 
 |         return; | 
 |       } | 
 |  | 
 |       Variable *Src0RLo = SrcsLo.src0R(this); | 
 |       Operand *ShAmtImm = shAmtImm(ShAmt); | 
 |       Operand *ComplShAmtImm = shAmtImm(32 - ShAmt); | 
 |       _lsr(T_Lo, Src0RLo, ShAmtImm); | 
 |       _orr(T_Lo, T_Lo, | 
 |            OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 
 |                                        OperandARM32::LSL, ComplShAmtImm)); | 
 |       _mov(DestLo, T_Lo); | 
 |  | 
 |       if (ASR) { | 
 |         _asr(T_Hi, Src0RHi, ShAmtImm); | 
 |       } else { | 
 |         _lsr(T_Hi, Src0RHi, ShAmtImm); | 
 |       } | 
 |       _mov(DestHi, T_Hi); | 
 |       return; | 
 |     } | 
 |  | 
 |     // a=b>>c | 
 |     // pnacl-llc does: | 
 |     // mov        t_b.lo, b.lo | 
 |     // mov        t_b.hi, b.hi | 
 |     // mov        t_c.lo, c.lo | 
 |     // lsr        T0, t_b.lo, t_c.lo | 
 |     // rsb        T1, t_c.lo, #32 | 
 |     // orr        t_a.lo, T0, t_b.hi, lsl T1 | 
 |     // sub        T2, t_c.lo, #32 | 
 |     // cmp        T2, #0 | 
 |     // [al]srge   t_a.lo, t_b.hi, T2 | 
 |     // [al]sr     t_a.hi, t_b.hi, t_c.lo | 
 |     // mov        a.lo, t_a.lo | 
 |     // mov        a.hi, t_a.hi | 
 |     // | 
 |     // GCC 4.8 does (lsr): | 
 |     // rsb        t_c1, c.lo, #32 | 
 |     // lsr        t_lo, b.lo, c.lo | 
 |     // orr        t_lo, t_lo, b.hi, lsl t_c1 | 
 |     // sub        t_c2, c.lo, #32 | 
 |     // orr        t_lo, t_lo, b.hi, lsr t_c2 | 
 |     // lsr        t_hi, b.hi, c.lo | 
 |     // mov        a.lo, t_lo | 
 |     // mov        a.hi, t_hi | 
 |     // | 
 |     // These are incompatible, therefore we mimic pnacl-llc. | 
 |     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | 
 |     Operand *_0 = | 
 |         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 
 |     Variable *T0 = makeReg(IceType_i32); | 
 |     Variable *T1 = makeReg(IceType_i32); | 
 |     Variable *T2 = makeReg(IceType_i32); | 
 |     Variable *TA_Lo = makeReg(IceType_i32); | 
 |     Variable *TA_Hi = makeReg(IceType_i32); | 
 |     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); | 
 |     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); | 
 |     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); | 
 |     _lsr(T0, Src0RLo, Src1RLo); | 
 |     _rsb(T1, Src1RLo, _32); | 
 |     _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 
 |                                                 OperandARM32::LSL, T1)); | 
 |     _sub(T2, Src1RLo, _32); | 
 |     _cmp(T2, _0); | 
 |     if (ASR) { | 
 |       _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); | 
 |       _set_dest_redefined(); | 
 |       _asr(TA_Hi, Src0RHi, Src1RLo); | 
 |     } else { | 
 |       _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); | 
 |       _set_dest_redefined(); | 
 |       _lsr(TA_Hi, Src0RHi, Src1RLo); | 
 |     } | 
 |     _mov(DestLo, TA_Lo); | 
 |     _mov(DestHi, TA_Hi); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Fadd: | 
 |   case InstArithmetic::Fsub: | 
 |   case InstArithmetic::Fmul: | 
 |   case InstArithmetic::Fdiv: | 
 |   case InstArithmetic::Frem: | 
 |     llvm::report_fatal_error("FP instruction with i64 type"); | 
 |     return; | 
 |   case InstArithmetic::Udiv: | 
 |   case InstArithmetic::Sdiv: | 
 |   case InstArithmetic::Urem: | 
 |   case InstArithmetic::Srem: | 
 |     llvm::report_fatal_error("Call-helper-involved instruction for i64 type " | 
 |                              "should have already been handled before"); | 
 |     return; | 
 |   } | 
 | } | 
 |  | 
 | namespace { | 
 | // StrengthReduction is a namespace with the strength reduction machinery. The | 
 | // entry point is the StrengthReduction::tryToOptimize method. It returns true | 
 | // if the optimization can be performed, and false otherwise. | 
 | // | 
 | // If the optimization can be performed, tryToOptimize sets its NumOperations | 
 | // parameter to the number of shifts that are needed to perform the | 
 | // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub> | 
 | // tuples that describe how to materialize the multiplication. | 
 | // | 
 | // The algorithm finds contiguous 1s in the Multiplication source, and uses one | 
 | // or two shifts to materialize it. A sequence of 1s, e.g., | 
 | // | 
 | //                  M           N | 
 | //   ...00000000000011111...111110000000... | 
 | // | 
 | // is materializable with (1 << (M + 1)) - (1 << N): | 
 | // | 
 | //   ...00000000000100000...000000000000...      [1 << (M + 1)] | 
 | //   ...00000000000000000...000010000000... (-)  [1 << N] | 
 | //   -------------------------------------- | 
 | //   ...00000000000011111...111110000000... | 
 | // | 
 | // And a single bit set, which is just a left shift. | 
 | namespace StrengthReduction { | 
 | enum AggregationOperation { | 
 |   AO_Invalid, | 
 |   AO_Add, | 
 |   AO_Sub, | 
 | }; | 
 |  | 
 | // AggregateElement is a glorified <ShAmt, AddOrSub> tuple. | 
 | class AggregationElement { | 
 |   AggregationElement(const AggregationElement &) = delete; | 
 |  | 
 | public: | 
 |   AggregationElement() = default; | 
 |   AggregationElement &operator=(const AggregationElement &) = default; | 
 |   AggregationElement(AggregationOperation Op, uint32_t ShAmt) | 
 |       : Op(Op), ShAmt(ShAmt) {} | 
 |  | 
 |   Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const { | 
 |     assert(OpR->mustHaveReg()); | 
 |     if (ShAmt == 0) { | 
 |       return OpR; | 
 |     } | 
 |     return OperandARM32FlexReg::create( | 
 |         Func, IceType_i32, OpR, OperandARM32::LSL, | 
 |         OperandARM32ShAmtImm::create( | 
 |             Func, llvm::cast<ConstantInteger32>( | 
 |                       Func->getContext()->getConstantInt32(ShAmt)))); | 
 |   } | 
 |  | 
 |   bool aggregateWithAdd() const { | 
 |     switch (Op) { | 
 |     case AO_Invalid: | 
 |       llvm::report_fatal_error("Invalid Strength Reduction Operations."); | 
 |     case AO_Add: | 
 |       return true; | 
 |     case AO_Sub: | 
 |       return false; | 
 |     } | 
 |     llvm_unreachable("(silence g++ warning)"); | 
 |   } | 
 |  | 
 |   uint32_t shAmt() const { return ShAmt; } | 
 |  | 
 | private: | 
 |   AggregationOperation Op = AO_Invalid; | 
 |   uint32_t ShAmt; | 
 | }; | 
 |  | 
 | // [RangeStart, RangeEnd] is a range of 1s in Src. | 
 | template <std::size_t N> | 
 | bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations, | 
 |                    std::array<AggregationElement, N> *Operations) { | 
 |   assert(*NumOperations < N); | 
 |   if (RangeStart == RangeEnd) { | 
 |     // Single bit set: | 
 |     // Src           : 0...00010... | 
 |     // RangeStart    :        ^ | 
 |     // RangeEnd      :        ^ | 
 |     // NegSrc        : 0...00001... | 
 |     (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart); | 
 |     ++(*NumOperations); | 
 |     return true; | 
 |   } | 
 |  | 
 |   // Sequence of 1s: (two operations required.) | 
 |   // Src           : 0...00011...110... | 
 |   // RangeStart    :        ^ | 
 |   // RangeEnd      :              ^ | 
 |   // NegSrc        : 0...00000...001... | 
 |   if (*NumOperations + 1 >= N) { | 
 |     return false; | 
 |   } | 
 |   (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1); | 
 |   ++(*NumOperations); | 
 |   (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd); | 
 |   ++(*NumOperations); | 
 |   return true; | 
 | } | 
 |  | 
 | // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit | 
 | // 1 surrounded by zeroes. | 
 | template <std::size_t N> | 
 | bool tryToOptimize(uint32_t Src, SizeT *NumOperations, | 
 |                    std::array<AggregationElement, N> *Operations) { | 
 |   constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT; | 
 |   uint32_t NegSrc = ~Src; | 
 |  | 
 |   *NumOperations = 0; | 
 |   while (Src != 0 && *NumOperations < N) { | 
 |     // Each step of the algorithm: | 
 |     //   * finds L, the last bit set in Src; | 
 |     //   * clears all the upper bits in NegSrc up to bit L; | 
 |     //   * finds nL, the last bit set in NegSrc; | 
 |     //   * clears all the upper bits in Src up to bit nL; | 
 |     // | 
 |     // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence | 
 |     // of 1s starting at L, and ending at nL + 1, was found. | 
 |     const uint32_t SrcLastBitSet = llvm::findLastSet(Src); | 
 |     const uint32_t NegSrcClearMask = | 
 |         (SrcLastBitSet == 0) ? 0 | 
 |                              : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet); | 
 |     NegSrc &= NegSrcClearMask; | 
 |     if (NegSrc == 0) { | 
 |       if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) { | 
 |         return true; | 
 |       } | 
 |       return false; | 
 |     } | 
 |     const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc); | 
 |     assert(NegSrcLastBitSet < SrcLastBitSet); | 
 |     const uint32_t SrcClearMask = | 
 |         (NegSrcLastBitSet == 0) ? 0 : (0xFFFFFFFFu) >> | 
 |                                           (SrcSizeBits - NegSrcLastBitSet); | 
 |     Src &= SrcClearMask; | 
 |     if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations, | 
 |                        Operations)) { | 
 |       return false; | 
 |     } | 
 |   } | 
 |  | 
 |   return Src == 0; | 
 | } | 
 | } // end of namespace StrengthReduction | 
 | } // end of anonymous namespace | 
 |  | 
 | void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { | 
 |   Variable *Dest = Instr->getDest(); | 
 |  | 
 |   if (Dest->isRematerializable()) { | 
 |     Context.insert<InstFakeDef>(Dest); | 
 |     return; | 
 |   } | 
 |  | 
 |   Type DestTy = Dest->getType(); | 
 |   if (DestTy == IceType_i1) { | 
 |     lowerInt1Arithmetic(Instr); | 
 |     return; | 
 |   } | 
 |  | 
 |   Operand *Src0 = legalizeUndef(Instr->getSrc(0)); | 
 |   Operand *Src1 = legalizeUndef(Instr->getSrc(1)); | 
 |   if (DestTy == IceType_i64) { | 
 |     lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1); | 
 |     return; | 
 |   } | 
 |  | 
 |   if (isVectorType(DestTy)) { | 
 |     switch (Instr->getOp()) { | 
 |     default: | 
 |       UnimplementedLoweringError(this, Instr); | 
 |       return; | 
 |     // Explicitly whitelist vector instructions we have implemented/enabled. | 
 |     case InstArithmetic::Add: | 
 |     case InstArithmetic::And: | 
 |     case InstArithmetic::Ashr: | 
 |     case InstArithmetic::Fadd: | 
 |     case InstArithmetic::Fmul: | 
 |     case InstArithmetic::Fsub: | 
 |     case InstArithmetic::Lshr: | 
 |     case InstArithmetic::Mul: | 
 |     case InstArithmetic::Or: | 
 |     case InstArithmetic::Shl: | 
 |     case InstArithmetic::Sub: | 
 |     case InstArithmetic::Xor: | 
 |       break; | 
 |     } | 
 |   } | 
 |  | 
 |   Variable *T = makeReg(DestTy); | 
 |  | 
 |   // * Handle div/rem separately. They require a non-legalized Src1 to inspect | 
 |   // whether or not Src1 is a non-zero constant. Once legalized it is more | 
 |   // difficult to determine (constant may be moved to a register). | 
 |   // * Handle floating point arithmetic separately: they require Src1 to be | 
 |   // legalized to a register. | 
 |   switch (Instr->getOp()) { | 
 |   default: | 
 |     break; | 
 |   case InstArithmetic::Udiv: { | 
 |     constexpr bool NotRemainder = false; | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 
 |                  NotRemainder); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Sdiv: { | 
 |     constexpr bool NotRemainder = false; | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, | 
 |                  NotRemainder); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Urem: { | 
 |     constexpr bool IsRemainder = true; | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 
 |                  IsRemainder); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Srem: { | 
 |     constexpr bool IsRemainder = true; | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, | 
 |                  IsRemainder); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Frem: { | 
 |     if (!isScalarFloatingType(DestTy)) { | 
 |       llvm::report_fatal_error("Unexpected type when lowering frem."); | 
 |     } | 
 |     llvm::report_fatal_error("Frem should have already been lowered."); | 
 |   } | 
 |   case InstArithmetic::Fadd: { | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { | 
 |       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); | 
 |       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); | 
 |       _vmla(Src0R, Src1R, Src2R); | 
 |       _mov(Dest, Src0R); | 
 |       return; | 
 |     } | 
 |  | 
 |     Variable *Src1R = legalizeToReg(Src1); | 
 |     _vadd(T, Src0R, Src1R); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Fsub: { | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { | 
 |       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); | 
 |       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); | 
 |       _vmls(Src0R, Src1R, Src2R); | 
 |       _mov(Dest, Src0R); | 
 |       return; | 
 |     } | 
 |     Variable *Src1R = legalizeToReg(Src1); | 
 |     _vsub(T, Src0R, Src1R); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Fmul: { | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     Variable *Src1R = legalizeToReg(Src1); | 
 |     _vmul(T, Src0R, Src1R); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Fdiv: { | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     Variable *Src1R = legalizeToReg(Src1); | 
 |     _vdiv(T, Src0R, Src1R); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   } | 
 |  | 
 |   // Handle everything else here. | 
 |   Int32Operands Srcs(Src0, Src1); | 
 |   switch (Instr->getOp()) { | 
 |   case InstArithmetic::_num: | 
 |     llvm::report_fatal_error("Unknown arithmetic operator"); | 
 |     return; | 
 |   case InstArithmetic::Add: { | 
 |     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { | 
 |       assert(!isVectorType(DestTy)); | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); | 
 |       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); | 
 |       _mla(T, Src1R, Src2R, Src0R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Srcs.hasConstOperand()) { | 
 |       if (!Srcs.immediateIsFlexEncodable() && | 
 |           Srcs.negatedImmediateIsFlexEncodable()) { | 
 |         assert(!isVectorType(DestTy)); | 
 |         Variable *Src0R = Srcs.src0R(this); | 
 |         Operand *Src1F = Srcs.negatedSrc1F(this); | 
 |         if (!Srcs.swappedOperands()) { | 
 |           _sub(T, Src0R, Src1F); | 
 |         } else { | 
 |           _rsb(T, Src0R, Src1F); | 
 |         } | 
 |         _mov(Dest, T); | 
 |         return; | 
 |       } | 
 |     } | 
 |     Variable *Src0R = Srcs.src0R(this); | 
 |     if (isVectorType(DestTy)) { | 
 |       Variable *Src1R = legalizeToReg(Src1); | 
 |       _vadd(T, Src0R, Src1R); | 
 |     } else { | 
 |       Operand *Src1RF = Srcs.src1RF(this); | 
 |       _add(T, Src0R, Src1RF); | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::And: { | 
 |     if (Srcs.hasConstOperand()) { | 
 |       if (!Srcs.immediateIsFlexEncodable() && | 
 |           Srcs.invertedImmediateIsFlexEncodable()) { | 
 |         Variable *Src0R = Srcs.src0R(this); | 
 |         Operand *Src1F = Srcs.invertedSrc1F(this); | 
 |         _bic(T, Src0R, Src1F); | 
 |         _mov(Dest, T); | 
 |         return; | 
 |       } | 
 |     } | 
 |     assert(isIntegerType(DestTy)); | 
 |     Variable *Src0R = Srcs.src0R(this); | 
 |     if (isVectorType(DestTy)) { | 
 |       Variable *Src1R = legalizeToReg(Src1); | 
 |       _vand(T, Src0R, Src1R); | 
 |     } else { | 
 |       Operand *Src1RF = Srcs.src1RF(this); | 
 |       _and(T, Src0R, Src1RF); | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Or: { | 
 |     Variable *Src0R = Srcs.src0R(this); | 
 |     assert(isIntegerType(DestTy)); | 
 |     if (isVectorType(DestTy)) { | 
 |       Variable *Src1R = legalizeToReg(Src1); | 
 |       _vorr(T, Src0R, Src1R); | 
 |     } else { | 
 |       Operand *Src1RF = Srcs.src1RF(this); | 
 |       _orr(T, Src0R, Src1RF); | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Xor: { | 
 |     Variable *Src0R = Srcs.src0R(this); | 
 |     assert(isIntegerType(DestTy)); | 
 |     if (isVectorType(DestTy)) { | 
 |       Variable *Src1R = legalizeToReg(Src1); | 
 |       _veor(T, Src0R, Src1R); | 
 |     } else { | 
 |       Operand *Src1RF = Srcs.src1RF(this); | 
 |       _eor(T, Src0R, Src1RF); | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Sub: { | 
 |     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { | 
 |       assert(!isVectorType(DestTy)); | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); | 
 |       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); | 
 |       _mls(T, Src1R, Src2R, Src0R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Srcs.hasConstOperand()) { | 
 |       assert(!isVectorType(DestTy)); | 
 |       if (Srcs.immediateIsFlexEncodable()) { | 
 |         Variable *Src0R = Srcs.src0R(this); | 
 |         Operand *Src1RF = Srcs.src1RF(this); | 
 |         if (Srcs.swappedOperands()) { | 
 |           _rsb(T, Src0R, Src1RF); | 
 |         } else { | 
 |           _sub(T, Src0R, Src1RF); | 
 |         } | 
 |         _mov(Dest, T); | 
 |         return; | 
 |       } | 
 |       if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { | 
 |         Variable *Src0R = Srcs.src0R(this); | 
 |         Operand *Src1F = Srcs.negatedSrc1F(this); | 
 |         _add(T, Src0R, Src1F); | 
 |         _mov(Dest, T); | 
 |         return; | 
 |       } | 
 |     } | 
 |     Variable *Src0R = Srcs.unswappedSrc0R(this); | 
 |     Variable *Src1R = Srcs.unswappedSrc1R(this); | 
 |     if (isVectorType(DestTy)) { | 
 |       _vsub(T, Src0R, Src1R); | 
 |     } else { | 
 |       _sub(T, Src0R, Src1R); | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Mul: { | 
 |     const bool OptM1 = Func->getOptLevel() == Opt_m1; | 
 |     if (!OptM1 && Srcs.hasConstOperand()) { | 
 |       constexpr std::size_t MaxShifts = 4; | 
 |       std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts; | 
 |       SizeT NumOperations; | 
 |       int32_t Const = Srcs.getConstantValue(); | 
 |       const bool Invert = Const < 0; | 
 |       const bool MultiplyByZero = Const == 0; | 
 |       Operand *_0 = | 
 |           legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex); | 
 |  | 
 |       if (MultiplyByZero) { | 
 |         _mov(T, _0); | 
 |         _mov(Dest, T); | 
 |         return; | 
 |       } | 
 |  | 
 |       if (Invert) { | 
 |         Const = -Const; | 
 |       } | 
 |  | 
 |       if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) { | 
 |         assert(NumOperations >= 1); | 
 |         Variable *Src0R = Srcs.src0R(this); | 
 |         int32_t Start; | 
 |         int32_t End; | 
 |         if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) { | 
 |           // Multiplication by a power of 2 (NumOperations == 1); or | 
 |           // Multiplication by a even number not a power of 2. | 
 |           Start = 1; | 
 |           End = NumOperations; | 
 |           assert(Shifts[0].aggregateWithAdd()); | 
 |           _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt())); | 
 |         } else { | 
 |           // Multiplication by an odd number. Put the free barrel shifter to a | 
 |           // good use. | 
 |           Start = 0; | 
 |           End = NumOperations - 2; | 
 |           const StrengthReduction::AggregationElement &Last = | 
 |               Shifts[NumOperations - 1]; | 
 |           const StrengthReduction::AggregationElement &SecondToLast = | 
 |               Shifts[NumOperations - 2]; | 
 |           if (!Last.aggregateWithAdd()) { | 
 |             assert(SecondToLast.aggregateWithAdd()); | 
 |             _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); | 
 |           } else if (!SecondToLast.aggregateWithAdd()) { | 
 |             assert(Last.aggregateWithAdd()); | 
 |             _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); | 
 |           } else { | 
 |             _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); | 
 |           } | 
 |         } | 
 |  | 
 |         // Odd numbers :   S                                 E   I   I | 
 |         //               +---+---+---+---+---+---+ ... +---+---+---+---+ | 
 |         //     Shifts  = |   |   |   |   |   |   | ... |   |   |   |   | | 
 |         //               +---+---+---+---+---+---+ ... +---+---+---+---+ | 
 |         // Even numbers:   I   S                                     E | 
 |         // | 
 |         // S: Start; E: End; I: Init | 
 |         for (int32_t I = Start; I < End; ++I) { | 
 |           const StrengthReduction::AggregationElement &Current = Shifts[I]; | 
 |           Operand *SrcF = Current.createShiftedOperand(Func, Src0R); | 
 |           if (Current.aggregateWithAdd()) { | 
 |             _add(T, T, SrcF); | 
 |           } else { | 
 |             _sub(T, T, SrcF); | 
 |           } | 
 |         } | 
 |  | 
 |         if (Invert) { | 
 |           // T = 0 - T. | 
 |           _rsb(T, T, _0); | 
 |         } | 
 |  | 
 |         _mov(Dest, T); | 
 |         return; | 
 |       } | 
 |     } | 
 |     Variable *Src0R = Srcs.unswappedSrc0R(this); | 
 |     Variable *Src1R = Srcs.unswappedSrc1R(this); | 
 |     if (isVectorType(DestTy)) { | 
 |       _vmul(T, Src0R, Src1R); | 
 |     } else { | 
 |       _mul(T, Src0R, Src1R); | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Shl: { | 
 |     Variable *Src0R = Srcs.unswappedSrc0R(this); | 
 |     if (!isVectorType(T->getType())) { | 
 |       if (Srcs.isSrc1ImmediateZero()) { | 
 |         _mov(T, Src0R); | 
 |       } else { | 
 |         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this); | 
 |         _lsl(T, Src0R, Src1R); | 
 |       } | 
 |     } else { | 
 |       if (Srcs.hasConstOperand()) { | 
 |         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1()); | 
 |         _vshl(T, Src0R, ShAmt); | 
 |       } else { | 
 |         auto *Src1R = Srcs.unswappedSrc1R(this); | 
 |         _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned); | 
 |       } | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Lshr: { | 
 |     Variable *Src0R = Srcs.unswappedSrc0R(this); | 
 |     if (!isVectorType(T->getType())) { | 
 |       if (DestTy != IceType_i32) { | 
 |         _uxt(Src0R, Src0R); | 
 |       } | 
 |       if (Srcs.isSrc1ImmediateZero()) { | 
 |         _mov(T, Src0R); | 
 |       } else { | 
 |         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this); | 
 |         _lsr(T, Src0R, Src1R); | 
 |       } | 
 |     } else { | 
 |       if (Srcs.hasConstOperand()) { | 
 |         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1()); | 
 |         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Unsigned); | 
 |       } else { | 
 |         auto *Src1R = Srcs.unswappedSrc1R(this); | 
 |         auto *Src1RNeg = makeReg(Src1R->getType()); | 
 |         _vneg(Src1RNeg, Src1R); | 
 |         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned); | 
 |       } | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Ashr: { | 
 |     Variable *Src0R = Srcs.unswappedSrc0R(this); | 
 |     if (!isVectorType(T->getType())) { | 
 |       if (DestTy != IceType_i32) { | 
 |         _sxt(Src0R, Src0R); | 
 |       } | 
 |       if (Srcs.isSrc1ImmediateZero()) { | 
 |         _mov(T, Src0R); | 
 |       } else { | 
 |         _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this)); | 
 |       } | 
 |     } else { | 
 |       if (Srcs.hasConstOperand()) { | 
 |         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1()); | 
 |         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Signed); | 
 |       } else { | 
 |         auto *Src1R = Srcs.unswappedSrc1R(this); | 
 |         auto *Src1RNeg = makeReg(Src1R->getType()); | 
 |         _vneg(Src1RNeg, Src1R); | 
 |         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed); | 
 |       } | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case InstArithmetic::Udiv: | 
 |   case InstArithmetic::Sdiv: | 
 |   case InstArithmetic::Urem: | 
 |   case InstArithmetic::Srem: | 
 |     llvm::report_fatal_error( | 
 |         "Integer div/rem should have been handled earlier."); | 
 |     return; | 
 |   case InstArithmetic::Fadd: | 
 |   case InstArithmetic::Fsub: | 
 |   case InstArithmetic::Fmul: | 
 |   case InstArithmetic::Fdiv: | 
 |   case InstArithmetic::Frem: | 
 |     llvm::report_fatal_error( | 
 |         "Floating point arith should have been handled earlier."); | 
 |     return; | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerAssign(const InstAssign *Instr) { | 
 |   Variable *Dest = Instr->getDest(); | 
 |  | 
 |   if (Dest->isRematerializable()) { | 
 |     Context.insert<InstFakeDef>(Dest); | 
 |     return; | 
 |   } | 
 |  | 
 |   Operand *Src0 = Instr->getSrc(0); | 
 |   assert(Dest->getType() == Src0->getType()); | 
 |   if (Dest->getType() == IceType_i64) { | 
 |     Src0 = legalizeUndef(Src0); | 
 |  | 
 |     Variable *T_Lo = makeReg(IceType_i32); | 
 |     auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 
 |     Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 
 |     _mov(T_Lo, Src0Lo); | 
 |     _mov(DestLo, T_Lo); | 
 |  | 
 |     Variable *T_Hi = makeReg(IceType_i32); | 
 |     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 
 |     Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 
 |     _mov(T_Hi, Src0Hi); | 
 |     _mov(DestHi, T_Hi); | 
 |  | 
 |     return; | 
 |   } | 
 |  | 
 |   Operand *NewSrc; | 
 |   if (Dest->hasReg()) { | 
 |     // If Dest already has a physical register, then legalize the Src operand | 
 |     // into a Variable with the same register assignment. This especially | 
 |     // helps allow the use of Flex operands. | 
 |     NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | 
 |   } else { | 
 |     // Dest could be a stack operand. Since we could potentially need to do a | 
 |     // Store (and store can only have Register operands), legalize this to a | 
 |     // register. | 
 |     NewSrc = legalize(Src0, Legal_Reg); | 
 |   } | 
 |  | 
 |   if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) { | 
 |     NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem); | 
 |   } | 
 |   _mov(Dest, NewSrc); | 
 | } | 
 |  | 
 | TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( | 
 |     Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, | 
 |     const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { | 
 |   InstARM32Label *NewShortCircuitLabel = nullptr; | 
 |   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 
 |  | 
 |   const Inst *Producer = Computations.getProducerOf(Boolean); | 
 |  | 
 |   if (Producer == nullptr) { | 
 |     // No producer, no problem: just do emit code to perform (Boolean & 1) and | 
 |     // set the flags register. The branch should be taken if the resulting flags | 
 |     // indicate a non-zero result. | 
 |     _tst(legalizeToReg(Boolean), _1); | 
 |     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); | 
 |   } | 
 |  | 
 |   switch (Producer->getKind()) { | 
 |   default: | 
 |     llvm::report_fatal_error("Unexpected producer."); | 
 |   case Inst::Icmp: { | 
 |     return ShortCircuitCondAndLabel( | 
 |         lowerIcmpCond(llvm::cast<InstIcmp>(Producer))); | 
 |   } break; | 
 |   case Inst::Fcmp: { | 
 |     return ShortCircuitCondAndLabel( | 
 |         lowerFcmpCond(llvm::cast<InstFcmp>(Producer))); | 
 |   } break; | 
 |   case Inst::Cast: { | 
 |     const auto *CastProducer = llvm::cast<InstCast>(Producer); | 
 |     assert(CastProducer->getCastKind() == InstCast::Trunc); | 
 |     Operand *Src = CastProducer->getSrc(0); | 
 |     if (Src->getType() == IceType_i64) | 
 |       Src = loOperand(Src); | 
 |     _tst(legalizeToReg(Src), _1); | 
 |     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); | 
 |   } break; | 
 |   case Inst::Arithmetic: { | 
 |     const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer); | 
 |     switch (ArithProducer->getOp()) { | 
 |     default: | 
 |       llvm::report_fatal_error("Unhandled Arithmetic Producer."); | 
 |     case InstArithmetic::And: { | 
 |       if (!(ShortCircuitable & SC_And)) { | 
 |         NewShortCircuitLabel = InstARM32Label::create(Func, this); | 
 |       } | 
 |  | 
 |       LowerInt1BranchTarget NewTarget = | 
 |           TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel); | 
 |  | 
 |       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch( | 
 |           Producer->getSrc(0), TargetTrue, NewTarget, SC_And); | 
 |       const CondWhenTrue &Cond = CondAndLabel.Cond; | 
 |  | 
 |       _br_short_circuit(NewTarget, Cond.invert()); | 
 |  | 
 |       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget; | 
 |       if (ShortCircuitLabel != nullptr) | 
 |         Context.insert(ShortCircuitLabel); | 
 |  | 
 |       return ShortCircuitCondAndLabel( | 
 |           lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All) | 
 |               .assertNoLabelAndReturnCond(), | 
 |           NewShortCircuitLabel); | 
 |     } break; | 
 |     case InstArithmetic::Or: { | 
 |       if (!(ShortCircuitable & SC_Or)) { | 
 |         NewShortCircuitLabel = InstARM32Label::create(Func, this); | 
 |       } | 
 |  | 
 |       LowerInt1BranchTarget NewTarget = | 
 |           TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel); | 
 |  | 
 |       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch( | 
 |           Producer->getSrc(0), NewTarget, TargetFalse, SC_Or); | 
 |       const CondWhenTrue &Cond = CondAndLabel.Cond; | 
 |  | 
 |       _br_short_circuit(NewTarget, Cond); | 
 |  | 
 |       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget; | 
 |       if (ShortCircuitLabel != nullptr) | 
 |         Context.insert(ShortCircuitLabel); | 
 |  | 
 |       return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1), | 
 |                                                          NewTarget, TargetFalse, | 
 |                                                          SC_All) | 
 |                                           .assertNoLabelAndReturnCond(), | 
 |                                       NewShortCircuitLabel); | 
 |     } break; | 
 |     } | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerBr(const InstBr *Instr) { | 
 |   if (Instr->isUnconditional()) { | 
 |     _br(Instr->getTargetUnconditional()); | 
 |     return; | 
 |   } | 
 |  | 
 |   CfgNode *TargetTrue = Instr->getTargetTrue(); | 
 |   CfgNode *TargetFalse = Instr->getTargetFalse(); | 
 |   ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch( | 
 |       Instr->getCondition(), LowerInt1BranchTarget(TargetTrue), | 
 |       LowerInt1BranchTarget(TargetFalse), SC_All); | 
 |   assert(CondAndLabel.ShortCircuitTarget == nullptr); | 
 |  | 
 |   const CondWhenTrue &Cond = CondAndLabel.Cond; | 
 |   if (Cond.WhenTrue1 != CondARM32::kNone) { | 
 |     assert(Cond.WhenTrue0 != CondARM32::AL); | 
 |     _br(TargetTrue, Cond.WhenTrue1); | 
 |   } | 
 |  | 
 |   switch (Cond.WhenTrue0) { | 
 |   default: | 
 |     _br(TargetTrue, TargetFalse, Cond.WhenTrue0); | 
 |     break; | 
 |   case CondARM32::kNone: | 
 |     _br(TargetFalse); | 
 |     break; | 
 |   case CondARM32::AL: | 
 |     _br(TargetTrue); | 
 |     break; | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerCall(const InstCall *Instr) { | 
 |   Operand *CallTarget = Instr->getCallTarget(); | 
 |   if (Instr->isTargetHelperCall()) { | 
 |     auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget); | 
 |     if (TargetHelperPreamble != ARM32HelpersPreamble.end()) { | 
 |       (this->*TargetHelperPreamble->second)(Instr); | 
 |     } | 
 |   } | 
 |   MaybeLeafFunc = false; | 
 |   NeedsStackAlignment = true; | 
 |  | 
 |   // Assign arguments to registers and stack. Also reserve stack. | 
 |   TargetARM32::CallingConv CC; | 
 |   // Pair of Arg Operand -> GPR number assignments. | 
 |   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs; | 
 |   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs; | 
 |   // Pair of Arg Operand -> stack offset. | 
 |   llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; | 
 |   size_t ParameterAreaSizeBytes = 0; | 
 |  | 
 |   // Classify each argument operand according to the location where the | 
 |   // argument is passed. | 
 |   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 
 |     Operand *Arg = legalizeUndef(Instr->getArg(i)); | 
 |     const Type Ty = Arg->getType(); | 
 |     bool InReg = false; | 
 |     RegNumT Reg; | 
 |     if (isScalarIntegerType(Ty)) { | 
 |       InReg = CC.argInGPR(Ty, &Reg); | 
 |     } else { | 
 |       InReg = CC.argInVFP(Ty, &Reg); | 
 |     } | 
 |  | 
 |     if (!InReg) { | 
 |       ParameterAreaSizeBytes = | 
 |           applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); | 
 |       StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); | 
 |       ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); | 
 |       continue; | 
 |     } | 
 |  | 
 |     if (Ty == IceType_i64) { | 
 |       Operand *Lo = loOperand(Arg); | 
 |       Operand *Hi = hiOperand(Arg); | 
 |       GPRArgs.push_back(std::make_pair( | 
 |           Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg)))); | 
 |       GPRArgs.push_back(std::make_pair( | 
 |           Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg)))); | 
 |     } else if (isScalarIntegerType(Ty)) { | 
 |       GPRArgs.push_back(std::make_pair(Arg, Reg)); | 
 |     } else { | 
 |       FPArgs.push_back(std::make_pair(Arg, Reg)); | 
 |     } | 
 |   } | 
 |  | 
 |   // Adjust the parameter area so that the stack is aligned. It is assumed that | 
 |   // the stack is already aligned at the start of the calling sequence. | 
 |   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 
 |  | 
 |   if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) { | 
 |     llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max."); | 
 |   } | 
 |  | 
 |   // Copy arguments that are passed on the stack to the appropriate stack | 
 |   // locations. | 
 |   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 
 |   for (auto &StackArg : StackArgs) { | 
 |     ConstantInteger32 *Loc = | 
 |         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); | 
 |     Type Ty = StackArg.first->getType(); | 
 |     OperandARM32Mem *Addr; | 
 |     constexpr bool SignExt = false; | 
 |     if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { | 
 |       Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); | 
 |     } else { | 
 |       Variable *NewBase = Func->makeVariable(SP->getType()); | 
 |       lowerArithmetic( | 
 |           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); | 
 |       Addr = formMemoryOperand(NewBase, Ty); | 
 |     } | 
 |     lowerStore(InstStore::create(Func, StackArg.first, Addr)); | 
 |   } | 
 |  | 
 |   // Generate the call instruction. Assign its result to a temporary with high | 
 |   // register allocation weight. | 
 |   Variable *Dest = Instr->getDest(); | 
 |   // ReturnReg doubles as ReturnRegLo as necessary. | 
 |   Variable *ReturnReg = nullptr; | 
 |   Variable *ReturnRegHi = nullptr; | 
 |   if (Dest) { | 
 |     switch (Dest->getType()) { | 
 |     case IceType_NUM: | 
 |       llvm::report_fatal_error("Invalid Call dest type"); | 
 |       break; | 
 |     case IceType_void: | 
 |       break; | 
 |     case IceType_i1: | 
 |       assert(Computations.getProducerOf(Dest) == nullptr); | 
 |     // Fall-through intended. | 
 |     case IceType_i8: | 
 |     case IceType_i16: | 
 |     case IceType_i32: | 
 |       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0); | 
 |       break; | 
 |     case IceType_i64: | 
 |       ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0); | 
 |       ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); | 
 |       break; | 
 |     case IceType_f32: | 
 |       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0); | 
 |       break; | 
 |     case IceType_f64: | 
 |       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0); | 
 |       break; | 
 |     case IceType_v4i1: | 
 |     case IceType_v8i1: | 
 |     case IceType_v16i1: | 
 |     case IceType_v16i8: | 
 |     case IceType_v8i16: | 
 |     case IceType_v4i32: | 
 |     case IceType_v4f32: | 
 |       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); | 
 |       break; | 
 |     } | 
 |   } | 
 |  | 
 |   // Allow ConstantRelocatable to be left alone as a direct call, but force | 
 |   // other constants like ConstantInteger32 to be in a register and make it an | 
 |   // indirect call. | 
 |   if (!llvm::isa<ConstantRelocatable>(CallTarget)) { | 
 |     CallTarget = legalize(CallTarget, Legal_Reg); | 
 |   } | 
 |  | 
 |   // Copy arguments to be passed in registers to the appropriate registers. | 
 |   CfgVector<Variable *> RegArgs; | 
 |   for (auto &FPArg : FPArgs) { | 
 |     RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second)); | 
 |   } | 
 |   for (auto &GPRArg : GPRArgs) { | 
 |     RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second)); | 
 |   } | 
 |  | 
 |   // Generate a FakeUse of register arguments so that they do not get dead code | 
 |   // eliminated as a result of the FakeKill of scratch registers after the call. | 
 |   // These fake-uses need to be placed here to avoid argument registers from | 
 |   // being used during the legalizeToReg() calls above. | 
 |   for (auto *RegArg : RegArgs) { | 
 |     Context.insert<InstFakeUse>(RegArg); | 
 |   } | 
 |  | 
 |   InstARM32Call *NewCall = | 
 |       Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); | 
 |  | 
 |   if (ReturnRegHi) | 
 |     Context.insert<InstFakeDef>(ReturnRegHi); | 
 |  | 
 |   // Insert a register-kill pseudo instruction. | 
 |   Context.insert<InstFakeKill>(NewCall); | 
 |  | 
 |   // Generate a FakeUse to keep the call live if necessary. | 
 |   if (Instr->hasSideEffects() && ReturnReg) { | 
 |     Context.insert<InstFakeUse>(ReturnReg); | 
 |   } | 
 |  | 
 |   if (Dest != nullptr) { | 
 |     // Assign the result of the call to Dest. | 
 |     if (ReturnReg != nullptr) { | 
 |       if (ReturnRegHi) { | 
 |         auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | 
 |         Variable *DestLo = Dest64On32->getLo(); | 
 |         Variable *DestHi = Dest64On32->getHi(); | 
 |         _mov(DestLo, ReturnReg); | 
 |         _mov(DestHi, ReturnRegHi); | 
 |       } else { | 
 |         if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { | 
 |           _mov(Dest, ReturnReg); | 
 |         } else { | 
 |           assert(isIntegerType(Dest->getType()) && | 
 |                  typeWidthInBytes(Dest->getType()) <= 4); | 
 |           _mov(Dest, ReturnReg); | 
 |         } | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   if (Instr->isTargetHelperCall()) { | 
 |     auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget); | 
 |     if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) { | 
 |       (this->*TargetHelpersPostamble->second)(Instr); | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | namespace { | 
 | void configureBitcastTemporary(Variable64On32 *Var) { | 
 |   Var->setMustNotHaveReg(); | 
 |   Var->getHi()->setMustHaveReg(); | 
 |   Var->getLo()->setMustHaveReg(); | 
 | } | 
 | } // end of anonymous namespace | 
 |  | 
 | void TargetARM32::lowerCast(const InstCast *Instr) { | 
 |   InstCast::OpKind CastKind = Instr->getCastKind(); | 
 |   Variable *Dest = Instr->getDest(); | 
 |   const Type DestTy = Dest->getType(); | 
 |   Operand *Src0 = legalizeUndef(Instr->getSrc(0)); | 
 |   switch (CastKind) { | 
 |   default: | 
 |     Func->setError("Cast type not supported"); | 
 |     return; | 
 |   case InstCast::Sext: { | 
 |     if (isVectorType(DestTy)) { | 
 |       Variable *T0 = makeReg(DestTy); | 
 |       Variable *T1 = makeReg(DestTy); | 
 |       ConstantInteger32 *ShAmt = nullptr; | 
 |       switch (DestTy) { | 
 |       default: | 
 |         llvm::report_fatal_error("Unexpected type in vector sext."); | 
 |       case IceType_v16i8: | 
 |         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7)); | 
 |         break; | 
 |       case IceType_v8i16: | 
 |         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15)); | 
 |         break; | 
 |       case IceType_v4i32: | 
 |         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31)); | 
 |         break; | 
 |       } | 
 |       auto *Src0R = legalizeToReg(Src0); | 
 |       _vshl(T0, Src0R, ShAmt); | 
 |       _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed); | 
 |       _mov(Dest, T1); | 
 |     } else if (DestTy == IceType_i64) { | 
 |       // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 | 
 |       Constant *ShiftAmt = Ctx->getConstantInt32(31); | 
 |       auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 
 |       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 
 |       Variable *T_Lo = makeReg(DestLo->getType()); | 
 |       if (Src0->getType() == IceType_i32) { | 
 |         Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 
 |         _mov(T_Lo, Src0RF); | 
 |       } else if (Src0->getType() != IceType_i1) { | 
 |         Variable *Src0R = legalizeToReg(Src0); | 
 |         _sxt(T_Lo, Src0R); | 
 |       } else { | 
 |         Operand *_0 = Ctx->getConstantZero(IceType_i32); | 
 |         Operand *_m1 = Ctx->getConstantInt32(-1); | 
 |         lowerInt1ForSelect(T_Lo, Src0, _m1, _0); | 
 |       } | 
 |       _mov(DestLo, T_Lo); | 
 |       Variable *T_Hi = makeReg(DestHi->getType()); | 
 |       if (Src0->getType() != IceType_i1) { | 
 |         _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo, | 
 |                                                OperandARM32::ASR, ShiftAmt)); | 
 |       } else { | 
 |         // For i1, the asr instruction is already done above. | 
 |         _mov(T_Hi, T_Lo); | 
 |       } | 
 |       _mov(DestHi, T_Hi); | 
 |     } else if (Src0->getType() != IceType_i1) { | 
 |       // t1 = sxt src; dst = t1 | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *T = makeReg(DestTy); | 
 |       _sxt(T, Src0R); | 
 |       _mov(Dest, T); | 
 |     } else { | 
 |       Constant *_0 = Ctx->getConstantZero(IceType_i32); | 
 |       Operand *_m1 = Ctx->getConstantInt(DestTy, -1); | 
 |       Variable *T = makeReg(DestTy); | 
 |       lowerInt1ForSelect(T, Src0, _m1, _0); | 
 |       _mov(Dest, T); | 
 |     } | 
 |     break; | 
 |   } | 
 |   case InstCast::Zext: { | 
 |     if (isVectorType(DestTy)) { | 
 |       auto *Mask = makeReg(DestTy); | 
 |       auto *_1 = Ctx->getConstantInt32(1); | 
 |       auto *T = makeReg(DestTy); | 
 |       auto *Src0R = legalizeToReg(Src0); | 
 |       _mov(Mask, _1); | 
 |       _vand(T, Src0R, Mask); | 
 |       _mov(Dest, T); | 
 |     } else if (DestTy == IceType_i64) { | 
 |       // t1=uxtb src; dst.lo=t1; dst.hi=0 | 
 |       Operand *_0 = | 
 |           legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 
 |       auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 
 |       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 
 |       Variable *T_Lo = makeReg(DestLo->getType()); | 
 |  | 
 |       switch (Src0->getType()) { | 
 |       default: { | 
 |         assert(Src0->getType() != IceType_i64); | 
 |         _uxt(T_Lo, legalizeToReg(Src0)); | 
 |       } break; | 
 |       case IceType_i32: { | 
 |         _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex)); | 
 |       } break; | 
 |       case IceType_i1: { | 
 |         SafeBoolChain Safe = lowerInt1(T_Lo, Src0); | 
 |         if (Safe == SBC_No) { | 
 |           Operand *_1 = | 
 |               legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 
 |           _and(T_Lo, T_Lo, _1); | 
 |         } | 
 |       } break; | 
 |       } | 
 |  | 
 |       _mov(DestLo, T_Lo); | 
 |  | 
 |       Variable *T_Hi = makeReg(DestLo->getType()); | 
 |       _mov(T_Hi, _0); | 
 |       _mov(DestHi, T_Hi); | 
 |     } else if (Src0->getType() == IceType_i1) { | 
 |       Variable *T = makeReg(DestTy); | 
 |  | 
 |       SafeBoolChain Safe = lowerInt1(T, Src0); | 
 |       if (Safe == SBC_No) { | 
 |         Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 
 |         _and(T, T, _1); | 
 |       } | 
 |  | 
 |       _mov(Dest, T); | 
 |     } else { | 
 |       // t1 = uxt src; dst = t1 | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *T = makeReg(DestTy); | 
 |       _uxt(T, Src0R); | 
 |       _mov(Dest, T); | 
 |     } | 
 |     break; | 
 |   } | 
 |   case InstCast::Trunc: { | 
 |     if (isVectorType(DestTy)) { | 
 |       auto *T = makeReg(DestTy); | 
 |       auto *Src0R = legalizeToReg(Src0); | 
 |       _mov(T, Src0R); | 
 |       _mov(Dest, T); | 
 |     } else { | 
 |       if (Src0->getType() == IceType_i64) | 
 |         Src0 = loOperand(Src0); | 
 |       Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 
 |       // t1 = trunc Src0RF; Dest = t1 | 
 |       Variable *T = makeReg(DestTy); | 
 |       _mov(T, Src0RF); | 
 |       if (DestTy == IceType_i1) | 
 |         _and(T, T, Ctx->getConstantInt1(1)); | 
 |       _mov(Dest, T); | 
 |     } | 
 |     break; | 
 |   } | 
 |   case InstCast::Fptrunc: | 
 |   case InstCast::Fpext: { | 
 |     // fptrunc: dest.f32 = fptrunc src0.fp64 | 
 |     // fpext: dest.f64 = fptrunc src0.fp32 | 
 |     const bool IsTrunc = CastKind == InstCast::Fptrunc; | 
 |     assert(!isVectorType(DestTy)); | 
 |     assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64)); | 
 |     assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32)); | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     Variable *T = makeReg(DestTy); | 
 |     _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d); | 
 |     _mov(Dest, T); | 
 |     break; | 
 |   } | 
 |   case InstCast::Fptosi: | 
 |   case InstCast::Fptoui: { | 
 |     const bool DestIsSigned = CastKind == InstCast::Fptosi; | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |  | 
 |     if (isVectorType(DestTy)) { | 
 |       assert(typeElementType(Src0->getType()) == IceType_f32); | 
 |       auto *T = makeReg(DestTy); | 
 |       _vcvt(T, Src0R, | 
 |             DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui); | 
 |       _mov(Dest, T); | 
 |       break; | 
 |     } | 
 |  | 
 |     const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType()); | 
 |     if (llvm::isa<Variable64On32>(Dest)) { | 
 |       llvm::report_fatal_error("fp-to-i64 should have been pre-lowered."); | 
 |     } | 
 |     // fptosi: | 
 |     //     t1.fp = vcvt src0.fp | 
 |     //     t2.i32 = vmov t1.fp | 
 |     //     dest.int = conv t2.i32     @ Truncates the result if needed. | 
 |     // fptoui: | 
 |     //     t1.fp = vcvt src0.fp | 
 |     //     t2.u32 = vmov t1.fp | 
 |     //     dest.uint = conv t2.u32    @ Truncates the result if needed. | 
 |     Variable *T_fp = makeReg(IceType_f32); | 
 |     const InstARM32Vcvt::VcvtVariant Conversion = | 
 |         Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui) | 
 |                   : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui); | 
 |     _vcvt(T_fp, Src0R, Conversion); | 
 |     Variable *T = makeReg(IceType_i32); | 
 |     _mov(T, T_fp); | 
 |     if (DestTy != IceType_i32) { | 
 |       Variable *T_1 = makeReg(DestTy); | 
 |       lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T)); | 
 |       T = T_1; | 
 |     } | 
 |     _mov(Dest, T); | 
 |     break; | 
 |   } | 
 |   case InstCast::Sitofp: | 
 |   case InstCast::Uitofp: { | 
 |     const bool SourceIsSigned = CastKind == InstCast::Sitofp; | 
 |  | 
 |     if (isVectorType(DestTy)) { | 
 |       assert(typeElementType(DestTy) == IceType_f32); | 
 |       auto *T = makeReg(DestTy); | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       _vcvt(T, Src0R, | 
 |             SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s); | 
 |       _mov(Dest, T); | 
 |       break; | 
 |     } | 
 |  | 
 |     const bool DestIsF32 = isFloat32Asserting32Or64(DestTy); | 
 |     if (Src0->getType() == IceType_i64) { | 
 |       llvm::report_fatal_error("i64-to-fp should have been pre-lowered."); | 
 |     } | 
 |     // sitofp: | 
 |     //     t1.i32 = sext src.int    @ sign-extends src0 if needed. | 
 |     //     t2.fp32 = vmov t1.i32 | 
 |     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64 | 
 |     // uitofp: | 
 |     //     t1.i32 = zext src.int    @ zero-extends src0 if needed. | 
 |     //     t2.fp32 = vmov t1.i32 | 
 |     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64 | 
 |     if (Src0->getType() != IceType_i32) { | 
 |       Variable *Src0R_32 = makeReg(IceType_i32); | 
 |       lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext | 
 |                                                       : InstCast::Zext, | 
 |                                  Src0R_32, Src0)); | 
 |       Src0 = Src0R_32; | 
 |     } | 
 |     Variable *Src0R = legalizeToReg(Src0); | 
 |     Variable *Src0R_f32 = makeReg(IceType_f32); | 
 |     _mov(Src0R_f32, Src0R); | 
 |     Src0R = Src0R_f32; | 
 |     Variable *T = makeReg(DestTy); | 
 |     const InstARM32Vcvt::VcvtVariant Conversion = | 
 |         DestIsF32 | 
 |             ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s) | 
 |             : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d); | 
 |     _vcvt(T, Src0R, Conversion); | 
 |     _mov(Dest, T); | 
 |     break; | 
 |   } | 
 |   case InstCast::Bitcast: { | 
 |     Operand *Src0 = Instr->getSrc(0); | 
 |     if (DestTy == Src0->getType()) { | 
 |       auto *Assign = InstAssign::create(Func, Dest, Src0); | 
 |       lowerAssign(Assign); | 
 |       return; | 
 |     } | 
 |     switch (DestTy) { | 
 |     case IceType_NUM: | 
 |     case IceType_void: | 
 |       llvm::report_fatal_error("Unexpected bitcast."); | 
 |     case IceType_i1: | 
 |       UnimplementedLoweringError(this, Instr); | 
 |       break; | 
 |     case IceType_i8: | 
 |       assert(Src0->getType() == IceType_v8i1); | 
 |       llvm::report_fatal_error( | 
 |           "i8 to v8i1 conversion should have been prelowered."); | 
 |       break; | 
 |     case IceType_i16: | 
 |       assert(Src0->getType() == IceType_v16i1); | 
 |       llvm::report_fatal_error( | 
 |           "i16 to v16i1 conversion should have been prelowered."); | 
 |       break; | 
 |     case IceType_i32: | 
 |     case IceType_f32: { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *T = makeReg(DestTy); | 
 |       _mov(T, Src0R); | 
 |       lowerAssign(InstAssign::create(Func, Dest, T)); | 
 |       break; | 
 |     } | 
 |     case IceType_i64: { | 
 |       // t0, t1 <- src0 | 
 |       // dest[31..0]  = t0 | 
 |       // dest[63..32] = t1 | 
 |       assert(Src0->getType() == IceType_f64); | 
 |       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | 
 |       T->initHiLo(Func); | 
 |       configureBitcastTemporary(T); | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       _mov(T, Src0R); | 
 |       Context.insert<InstFakeUse>(T->getHi()); | 
 |       Context.insert<InstFakeUse>(T->getLo()); | 
 |       lowerAssign(InstAssign::create(Func, Dest, T)); | 
 |       break; | 
 |     } | 
 |     case IceType_f64: { | 
 |       // T0 <- lo(src) | 
 |       // T1 <- hi(src) | 
 |       // vmov T2, T0, T1 | 
 |       // Dest <- T2 | 
 |       assert(Src0->getType() == IceType_i64); | 
 |       Variable *T = makeReg(DestTy); | 
 |       auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | 
 |       Src64->initHiLo(Func); | 
 |       configureBitcastTemporary(Src64); | 
 |       lowerAssign(InstAssign::create(Func, Src64, Src0)); | 
 |       _mov(T, Src64); | 
 |       lowerAssign(InstAssign::create(Func, Dest, T)); | 
 |       break; | 
 |     } | 
 |     case IceType_v8i1: | 
 |       assert(Src0->getType() == IceType_i8); | 
 |       llvm::report_fatal_error( | 
 |           "v8i1 to i8 conversion should have been prelowered."); | 
 |       break; | 
 |     case IceType_v16i1: | 
 |       assert(Src0->getType() == IceType_i16); | 
 |       llvm::report_fatal_error( | 
 |           "v16i1 to i16 conversion should have been prelowered."); | 
 |       break; | 
 |     case IceType_v4i1: | 
 |     case IceType_v8i16: | 
 |     case IceType_v16i8: | 
 |     case IceType_v4f32: | 
 |     case IceType_v4i32: { | 
 |       assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType())); | 
 |       assert(isVectorType(DestTy) == isVectorType(Src0->getType())); | 
 |       Variable *T = makeReg(DestTy); | 
 |       _mov(T, Src0); | 
 |       _mov(Dest, T); | 
 |       break; | 
 |     } | 
 |     } | 
 |     break; | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) { | 
 |   Variable *Dest = Instr->getDest(); | 
 |   Type DestTy = Dest->getType(); | 
 |  | 
 |   Variable *Src0 = legalizeToReg(Instr->getSrc(0)); | 
 |   Operand *Src1 = Instr->getSrc(1); | 
 |  | 
 |   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 
 |     const uint32_t Index = Imm->getValue(); | 
 |     Variable *T = makeReg(DestTy); | 
 |     Variable *TSrc0 = makeReg(Src0->getType()); | 
 |  | 
 |     if (isFloatingType(DestTy)) { | 
 |       // We need to make sure the source is in a suitable register. | 
 |       TSrc0->setRegClass(RegARM32::RCARM32_QtoS); | 
 |     } | 
 |  | 
 |     _mov(TSrc0, Src0); | 
 |     _extractelement(T, TSrc0, Index); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   assert(false && "extractelement requires a constant index"); | 
 | } | 
 |  | 
 | namespace { | 
 | // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering | 
 | // (and naming). | 
 | enum { | 
 | #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val, | 
 |   FCMPARM32_TABLE | 
 | #undef X | 
 |       _fcmp_ll_NUM | 
 | }; | 
 |  | 
 | enum { | 
 | #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag, | 
 |   ICEINSTFCMP_TABLE | 
 | #undef X | 
 |       _fcmp_hl_NUM | 
 | }; | 
 |  | 
 | static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM, | 
 |               "Inconsistency between high-level and low-level fcmp tags."); | 
 | #define X(tag, str)                                                            \ | 
 |   static_assert(                                                               \ | 
 |       (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag,                    \ | 
 |       "Inconsistency between high-level and low-level fcmp tag " #tag); | 
 | ICEINSTFCMP_TABLE | 
 | #undef X | 
 |  | 
 | struct { | 
 |   CondARM32::Cond CC0; | 
 |   CondARM32::Cond CC1; | 
 | } TableFcmp[] = { | 
 | #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \ | 
 |   { CondARM32::CC0, CondARM32::CC1 }                                           \ | 
 |   , | 
 |     FCMPARM32_TABLE | 
 | #undef X | 
 | }; | 
 |  | 
 | bool isFloatingPointZero(const Operand *Src) { | 
 |   if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) { | 
 |     return Utils::isPositiveZero(F32->getValue()); | 
 |   } | 
 |  | 
 |   if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) { | 
 |     return Utils::isPositiveZero(F64->getValue()); | 
 |   } | 
 |  | 
 |   return false; | 
 | } | 
 | } // end of anonymous namespace | 
 |  | 
 | TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { | 
 |   InstFcmp::FCond Condition = Instr->getCondition(); | 
 |   switch (Condition) { | 
 |   case InstFcmp::False: | 
 |     return CondWhenTrue(CondARM32::kNone); | 
 |   case InstFcmp::True: | 
 |     return CondWhenTrue(CondARM32::AL); | 
 |     break; | 
 |   default: { | 
 |     Variable *Src0R = legalizeToReg(Instr->getSrc(0)); | 
 |     Operand *Src1 = Instr->getSrc(1); | 
 |     if (isFloatingPointZero(Src1)) { | 
 |       _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType())); | 
 |     } else { | 
 |       _vcmp(Src0R, legalizeToReg(Src1)); | 
 |     } | 
 |     _vmrs(); | 
 |     assert(Condition < llvm::array_lengthof(TableFcmp)); | 
 |     return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerFcmp(const InstFcmp *Instr) { | 
 |   Variable *Dest = Instr->getDest(); | 
 |   const Type DestTy = Dest->getType(); | 
 |  | 
 |   if (isVectorType(DestTy)) { | 
 |     if (Instr->getCondition() == InstFcmp::False) { | 
 |       constexpr Type SafeTypeForMovingConstant = IceType_v4i32; | 
 |       auto *T = makeReg(SafeTypeForMovingConstant); | 
 |       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0))); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Instr->getCondition() == InstFcmp::True) { | 
 |       constexpr Type SafeTypeForMovingConstant = IceType_v4i32; | 
 |       auto *T = makeReg(SafeTypeForMovingConstant); | 
 |       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1))); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     Variable *T0; | 
 |     Variable *T1; | 
 |     bool Negate = false; | 
 |     auto *Src0 = legalizeToReg(Instr->getSrc(0)); | 
 |     auto *Src1 = legalizeToReg(Instr->getSrc(1)); | 
 |  | 
 |     switch (Instr->getCondition()) { | 
 |     default: | 
 |       llvm::report_fatal_error("Unhandled fp comparison."); | 
 | #define _Vcnone(Tptr, S0, S1)                                                  \ | 
 |   do {                                                                         \ | 
 |     *(Tptr) = nullptr;                                                         \ | 
 |   } while (0) | 
 | #define _Vceq(Tptr, S0, S1)                                                    \ | 
 |   do {                                                                         \ | 
 |     *(Tptr) = makeReg(DestTy);                                                 \ | 
 |     _vceq(*(Tptr), S0, S1);                                                    \ | 
 |   } while (0) | 
 | #define _Vcge(Tptr, S0, S1)                                                    \ | 
 |   do {                                                                         \ | 
 |     *(Tptr) = makeReg(DestTy);                                                 \ | 
 |     _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \ | 
 |   } while (0) | 
 | #define _Vcgt(Tptr, S0, S1)                                                    \ | 
 |   do {                                                                         \ | 
 |     *(Tptr) = makeReg(DestTy);                                                 \ | 
 |     _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \ | 
 |   } while (0) | 
 | #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \ | 
 |   case InstFcmp::val: {                                                        \ | 
 |     _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1);             \ | 
 |     _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0);             \ | 
 |     Negate = NEG_V;                                                            \ | 
 |   } break; | 
 |       FCMPARM32_TABLE | 
 | #undef X | 
 | #undef _Vcgt | 
 | #undef _Vcge | 
 | #undef _Vceq | 
 | #undef _Vcnone | 
 |     } | 
 |     assert(T0 != nullptr); | 
 |     Variable *T = T0; | 
 |     if (T1 != nullptr) { | 
 |       T = makeReg(DestTy); | 
 |       _vorr(T, T0, T1); | 
 |     } | 
 |  | 
 |     if (Negate) { | 
 |       auto *TNeg = makeReg(DestTy); | 
 |       _vmvn(TNeg, T); | 
 |       T = TNeg; | 
 |     } | 
 |  | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |  | 
 |   Variable *T = makeReg(IceType_i1); | 
 |   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex); | 
 |   Operand *_0 = | 
 |       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 
 |  | 
 |   CondWhenTrue Cond = lowerFcmpCond(Instr); | 
 |  | 
 |   bool RedefineT = false; | 
 |   if (Cond.WhenTrue0 != CondARM32::AL) { | 
 |     _mov(T, _0); | 
 |     RedefineT = true; | 
 |   } | 
 |  | 
 |   if (Cond.WhenTrue0 == CondARM32::kNone) { | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |  | 
 |   if (RedefineT) { | 
 |     _mov_redefined(T, _1, Cond.WhenTrue0); | 
 |   } else { | 
 |     _mov(T, _1, Cond.WhenTrue0); | 
 |   } | 
 |  | 
 |   if (Cond.WhenTrue1 != CondARM32::kNone) { | 
 |     _mov_redefined(T, _1, Cond.WhenTrue1); | 
 |   } | 
 |  | 
 |   _mov(Dest, T); | 
 | } | 
 |  | 
 | TargetARM32::CondWhenTrue | 
 | TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, | 
 |                                 Operand *Src1) { | 
 |   assert(Condition < llvm::array_lengthof(TableIcmp64)); | 
 |  | 
 |   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); | 
 |   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); | 
 |   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); | 
 |   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); | 
 |  | 
 |   if (SrcsLo.hasConstOperand()) { | 
 |     const uint32_t ValueLo = SrcsLo.getConstantValue(); | 
 |     const uint32_t ValueHi = SrcsHi.getConstantValue(); | 
 |     const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo; | 
 |     if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && | 
 |         Value == 0) { | 
 |       Variable *T = makeReg(IceType_i32); | 
 |       Variable *Src0LoR = SrcsLo.src0R(this); | 
 |       Variable *Src0HiR = SrcsHi.src0R(this); | 
 |       _orrs(T, Src0LoR, Src0HiR); | 
 |       Context.insert<InstFakeUse>(T); | 
 |       return CondWhenTrue(TableIcmp64[Condition].C1); | 
 |     } | 
 |  | 
 |     Variable *Src0RLo = SrcsLo.src0R(this); | 
 |     Variable *Src0RHi = SrcsHi.src0R(this); | 
 |     Operand *Src1RFLo = SrcsLo.src1RF(this); | 
 |     Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this); | 
 |  | 
 |     const bool UseRsb = | 
 |         TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands(); | 
 |  | 
 |     if (UseRsb) { | 
 |       if (TableIcmp64[Condition].IsSigned) { | 
 |         Variable *T = makeReg(IceType_i32); | 
 |         _rsbs(T, Src0RLo, Src1RFLo); | 
 |         Context.insert<InstFakeUse>(T); | 
 |  | 
 |         T = makeReg(IceType_i32); | 
 |         _rscs(T, Src0RHi, Src1RFHi); | 
 |         // We need to add a FakeUse here because liveness gets mad at us (Def | 
 |         // without Use.) Note that flag-setting instructions are considered to | 
 |         // have side effects and, therefore, are not DCE'ed. | 
 |         Context.insert<InstFakeUse>(T); | 
 |       } else { | 
 |         Variable *T = makeReg(IceType_i32); | 
 |         _rsbs(T, Src0RHi, Src1RFHi); | 
 |         Context.insert<InstFakeUse>(T); | 
 |  | 
 |         T = makeReg(IceType_i32); | 
 |         _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ); | 
 |         Context.insert<InstFakeUse>(T); | 
 |       } | 
 |     } else { | 
 |       if (TableIcmp64[Condition].IsSigned) { | 
 |         _cmp(Src0RLo, Src1RFLo); | 
 |         Variable *T = makeReg(IceType_i32); | 
 |         _sbcs(T, Src0RHi, Src1RFHi); | 
 |         Context.insert<InstFakeUse>(T); | 
 |       } else { | 
 |         _cmp(Src0RHi, Src1RFHi); | 
 |         _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); | 
 |       } | 
 |     } | 
 |  | 
 |     return CondWhenTrue(TableIcmp64[Condition].C1); | 
 |   } | 
 |  | 
 |   Variable *Src0RLo, *Src0RHi; | 
 |   Operand *Src1RFLo, *Src1RFHi; | 
 |   if (TableIcmp64[Condition].Swapped) { | 
 |     Src0RLo = legalizeToReg(loOperand(Src1)); | 
 |     Src0RHi = legalizeToReg(hiOperand(Src1)); | 
 |     Src1RFLo = legalizeToReg(loOperand(Src0)); | 
 |     Src1RFHi = legalizeToReg(hiOperand(Src0)); | 
 |   } else { | 
 |     Src0RLo = legalizeToReg(loOperand(Src0)); | 
 |     Src0RHi = legalizeToReg(hiOperand(Src0)); | 
 |     Src1RFLo = legalizeToReg(loOperand(Src1)); | 
 |     Src1RFHi = legalizeToReg(hiOperand(Src1)); | 
 |   } | 
 |  | 
 |   // a=icmp cond, b, c ==> | 
 |   // GCC does: | 
 |   //   cmp      b.hi, c.hi     or  cmp      b.lo, c.lo | 
 |   //   cmp.eq   b.lo, c.lo         sbcs t1, b.hi, c.hi | 
 |   //   mov.<C1> t, #1              mov.<C1> t, #1 | 
 |   //   mov.<C2> t, #0              mov.<C2> t, #0 | 
 |   //   mov      a, t               mov      a, t | 
 |   // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" | 
 |   // is used for signed compares. In some cases, b and c need to be swapped as | 
 |   // well. | 
 |   // | 
 |   // LLVM does: | 
 |   // for EQ and NE: | 
 |   //   eor  t1, b.hi, c.hi | 
 |   //   eor  t2, b.lo, c.hi | 
 |   //   orrs t, t1, t2 | 
 |   //   mov.<C> t, #1 | 
 |   //   mov  a, t | 
 |   // | 
 |   // that's nice in that it's just as short but has fewer dependencies for | 
 |   // better ILP at the cost of more registers. | 
 |   // | 
 |   // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two | 
 |   // unconditional mov #0, two cmps, two conditional mov #1, and one | 
 |   // conditional reg mov. That has few dependencies for good ILP, but is a | 
 |   // longer sequence. | 
 |   // | 
 |   // So, we are going with the GCC version since it's usually better (except | 
 |   // perhaps for eq/ne). We could revisit special-casing eq/ne later. | 
 |   if (TableIcmp64[Condition].IsSigned) { | 
 |     Variable *ScratchReg = makeReg(IceType_i32); | 
 |     _cmp(Src0RLo, Src1RFLo); | 
 |     _sbcs(ScratchReg, Src0RHi, Src1RFHi); | 
 |     // ScratchReg isn't going to be used, but we need the side-effect of | 
 |     // setting flags from this operation. | 
 |     Context.insert<InstFakeUse>(ScratchReg); | 
 |   } else { | 
 |     _cmp(Src0RHi, Src1RFHi); | 
 |     _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); | 
 |   } | 
 |   return CondWhenTrue(TableIcmp64[Condition].C1); | 
 | } | 
 |  | 
 | TargetARM32::CondWhenTrue | 
 | TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, | 
 |                                 Operand *Src1) { | 
 |   Int32Operands Srcs(Src0, Src1); | 
 |   if (!Srcs.hasConstOperand()) { | 
 |  | 
 |     Variable *Src0R = Srcs.src0R(this); | 
 |     Operand *Src1RF = Srcs.src1RF(this); | 
 |     _cmp(Src0R, Src1RF); | 
 |     return CondWhenTrue(getIcmp32Mapping(Condition)); | 
 |   } | 
 |  | 
 |   Variable *Src0R = Srcs.src0R(this); | 
 |   const int32_t Value = Srcs.getConstantValue(); | 
 |   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { | 
 |     _tst(Src0R, Src0R); | 
 |     return CondWhenTrue(getIcmp32Mapping(Condition)); | 
 |   } | 
 |  | 
 |   if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() && | 
 |       Srcs.negatedImmediateIsFlexEncodable()) { | 
 |     Operand *Src1F = Srcs.negatedSrc1F(this); | 
 |     _cmn(Src0R, Src1F); | 
 |     return CondWhenTrue(getIcmp32Mapping(Condition)); | 
 |   } | 
 |  | 
 |   Operand *Src1RF = Srcs.src1RF(this); | 
 |   if (!Srcs.swappedOperands()) { | 
 |     _cmp(Src0R, Src1RF); | 
 |   } else { | 
 |     Variable *T = makeReg(IceType_i32); | 
 |     _rsbs(T, Src0R, Src1RF); | 
 |     Context.insert<InstFakeUse>(T); | 
 |   } | 
 |   return CondWhenTrue(getIcmp32Mapping(Condition)); | 
 | } | 
 |  | 
 | TargetARM32::CondWhenTrue | 
 | TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0, | 
 |                                        Operand *Src1) { | 
 |   Int32Operands Srcs(Src0, Src1); | 
 |   const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 
 |   assert(ShAmt >= 0); | 
 |  | 
 |   if (!Srcs.hasConstOperand()) { | 
 |     Variable *Src0R = makeReg(IceType_i32); | 
 |     Operand *ShAmtImm = shAmtImm(ShAmt); | 
 |     _lsl(Src0R, legalizeToReg(Src0), ShAmtImm); | 
 |  | 
 |     Variable *Src1R = legalizeToReg(Src1); | 
 |     auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R, | 
 |                                               OperandARM32::LSL, ShAmtImm); | 
 |     _cmp(Src0R, Src1F); | 
 |     return CondWhenTrue(getIcmp32Mapping(Condition)); | 
 |   } | 
 |  | 
 |   const int32_t Value = Srcs.getConstantValue(); | 
 |   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { | 
 |     Operand *ShAmtImm = shAmtImm(ShAmt); | 
 |     Variable *T = makeReg(IceType_i32); | 
 |     _lsls(T, Srcs.src0R(this), ShAmtImm); | 
 |     Context.insert<InstFakeUse>(T); | 
 |     return CondWhenTrue(getIcmp32Mapping(Condition)); | 
 |   } | 
 |  | 
 |   Variable *ConstR = makeReg(IceType_i32); | 
 |   _mov(ConstR, | 
 |        legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex)); | 
 |   Operand *NonConstF = OperandARM32FlexReg::create( | 
 |       Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL, | 
 |       Ctx->getConstantInt32(ShAmt)); | 
 |  | 
 |   if (Srcs.swappedOperands()) { | 
 |     _cmp(ConstR, NonConstF); | 
 |   } else { | 
 |     Variable *T = makeReg(IceType_i32); | 
 |     _rsbs(T, ConstR, NonConstF); | 
 |     Context.insert<InstFakeUse>(T); | 
 |   } | 
 |   return CondWhenTrue(getIcmp32Mapping(Condition)); | 
 | } | 
 |  | 
 | TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { | 
 |   return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0), | 
 |                        Instr->getSrc(1)); | 
 | } | 
 |  | 
 | TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition, | 
 |                                                      Operand *Src0, | 
 |                                                      Operand *Src1) { | 
 |   Src0 = legalizeUndef(Src0); | 
 |   Src1 = legalizeUndef(Src1); | 
 |  | 
 |   // a=icmp cond b, c ==> | 
 |   // GCC does: | 
 |   //   <u/s>xtb tb, b | 
 |   //   <u/s>xtb tc, c | 
 |   //   cmp      tb, tc | 
 |   //   mov.C1   t, #0 | 
 |   //   mov.C2   t, #1 | 
 |   //   mov      a, t | 
 |   // where the unsigned/sign extension is not needed for 32-bit. They also have | 
 |   // special cases for EQ and NE. E.g., for NE: | 
 |   //   <extend to tb, tc> | 
 |   //   subs     t, tb, tc | 
 |   //   movne    t, #1 | 
 |   //   mov      a, t | 
 |   // | 
 |   // LLVM does: | 
 |   //   lsl     tb, b, #<N> | 
 |   //   mov     t, #0 | 
 |   //   cmp     tb, c, lsl #<N> | 
 |   //   mov.<C> t, #1 | 
 |   //   mov     a, t | 
 |   // | 
 |   // the left shift is by 0, 16, or 24, which allows the comparison to focus on | 
 |   // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For | 
 |   // the unsigned case, for some reason it does similar to GCC and does a uxtb | 
 |   // first. It's not clear to me why that special-casing is needed. | 
 |   // | 
 |   // We'll go with the LLVM way for now, since it's shorter and has just as few | 
 |   // dependencies. | 
 |   switch (Src0->getType()) { | 
 |   default: | 
 |     llvm::report_fatal_error("Unhandled type in lowerIcmpCond"); | 
 |   case IceType_i1: | 
 |   case IceType_i8: | 
 |   case IceType_i16: | 
 |     return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1); | 
 |   case IceType_i32: | 
 |     return lowerInt32IcmpCond(Condition, Src0, Src1); | 
 |   case IceType_i64: | 
 |     return lowerInt64IcmpCond(Condition, Src0, Src1); | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerIcmp(const InstIcmp *Instr) { | 
 |   Variable *Dest = Instr->getDest(); | 
 |   const Type DestTy = Dest->getType(); | 
 |  | 
 |   if (isVectorType(DestTy)) { | 
 |     auto *T = makeReg(DestTy); | 
 |     auto *Src0 = legalizeToReg(Instr->getSrc(0)); | 
 |     auto *Src1 = legalizeToReg(Instr->getSrc(1)); | 
 |     const Type SrcTy = Src0->getType(); | 
 |  | 
 |     bool NeedsShl = false; | 
 |     Type NewTypeAfterShl; | 
 |     SizeT ShAmt; | 
 |     switch (SrcTy) { | 
 |     default: | 
 |       break; | 
 |     case IceType_v16i1: | 
 |       NeedsShl = true; | 
 |       NewTypeAfterShl = IceType_v16i8; | 
 |       ShAmt = 7; | 
 |       break; | 
 |     case IceType_v8i1: | 
 |       NeedsShl = true; | 
 |       NewTypeAfterShl = IceType_v8i16; | 
 |       ShAmt = 15; | 
 |       break; | 
 |     case IceType_v4i1: | 
 |       NeedsShl = true; | 
 |       NewTypeAfterShl = IceType_v4i32; | 
 |       ShAmt = 31; | 
 |       break; | 
 |     } | 
 |  | 
 |     if (NeedsShl) { | 
 |       auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt)); | 
 |       auto *Src0T = makeReg(NewTypeAfterShl); | 
 |       auto *Src0Shl = makeReg(NewTypeAfterShl); | 
 |       _mov(Src0T, Src0); | 
 |       _vshl(Src0Shl, Src0T, Imm); | 
 |       Src0 = Src0Shl; | 
 |  | 
 |       auto *Src1T = makeReg(NewTypeAfterShl); | 
 |       auto *Src1Shl = makeReg(NewTypeAfterShl); | 
 |       _mov(Src1T, Src1); | 
 |       _vshl(Src1Shl, Src1T, Imm); | 
 |       Src1 = Src1Shl; | 
 |     } | 
 |  | 
 |     switch (Instr->getCondition()) { | 
 |     default: | 
 |       llvm::report_fatal_error("Unhandled integer comparison."); | 
 | #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1) | 
 | #define _Vcge(T, S0, S1, Signed)                                               \ | 
 |   _vcge(T, S0, S1)                                                             \ | 
 |       ->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned) | 
 | #define _Vcgt(T, S0, S1, Signed)                                               \ | 
 |   _vcgt(T, S0, S1)                                                             \ | 
 |       ->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned) | 
 | #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \ | 
 |   case InstIcmp::val: {                                                        \ | 
 |     _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed);      \ | 
 |     if (NEG_V) {                                                               \ | 
 |       auto *TInv = makeReg(DestTy);                                            \ | 
 |       _vmvn(TInv, T);                                                          \ | 
 |       T = TInv;                                                                \ | 
 |     }                                                                          \ | 
 |   } break; | 
 |       ICMPARM32_TABLE | 
 | #undef X | 
 | #undef _Vcgt | 
 | #undef _Vcge | 
 | #undef _Vceq | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |  | 
 |   Operand *_0 = | 
 |       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 
 |   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex); | 
 |   Variable *T = makeReg(IceType_i1); | 
 |  | 
 |   _mov(T, _0); | 
 |   CondWhenTrue Cond = lowerIcmpCond(Instr); | 
 |   _mov_redefined(T, _1, Cond.WhenTrue0); | 
 |   _mov(Dest, T); | 
 |  | 
 |   assert(Cond.WhenTrue1 == CondARM32::kNone); | 
 |  | 
 |   return; | 
 | } | 
 |  | 
 | void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) { | 
 |   Variable *Dest = Instr->getDest(); | 
 |   Type DestTy = Dest->getType(); | 
 |  | 
 |   Variable *Src0 = legalizeToReg(Instr->getSrc(0)); | 
 |   Variable *Src1 = legalizeToReg(Instr->getSrc(1)); | 
 |   Operand *Src2 = Instr->getSrc(2); | 
 |  | 
 |   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) { | 
 |     const uint32_t Index = Imm->getValue(); | 
 |     Variable *T = makeReg(DestTy); | 
 |  | 
 |     if (isFloatingType(DestTy)) { | 
 |       T->setRegClass(RegARM32::RCARM32_QtoS); | 
 |     } | 
 |  | 
 |     _mov(T, Src0); | 
 |     _insertelement(T, Src1, Index); | 
 |     _set_dest_redefined(); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   assert(false && "insertelement requires a constant index"); | 
 | } | 
 |  | 
 | namespace { | 
 | inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 
 |   if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 
 |     return Integer->getValue(); | 
 |   return Intrinsics::MemoryOrderInvalid; | 
 | } | 
 | } // end of anonymous namespace | 
 |  | 
 | void TargetARM32::lowerLoadLinkedStoreExclusive( | 
 |     Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation, | 
 |     CondARM32::Cond Cond) { | 
 |  | 
 |   auto *Retry = Context.insert<InstARM32Label>(this); | 
 |  | 
 |   { // scoping for loop highlighting. | 
 |     Variable *Success = makeReg(IceType_i32); | 
 |     Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty); | 
 |     auto *_0 = Ctx->getConstantZero(IceType_i32); | 
 |  | 
 |     Context.insert<InstFakeDef>(Tmp); | 
 |     Context.insert<InstFakeUse>(Tmp); | 
 |     Variable *AddrR = legalizeToReg(Addr); | 
 |     _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined(); | 
 |     auto *StoreValue = Operation(Tmp); | 
 |     assert(StoreValue->mustHaveReg()); | 
 |     // strex requires Dest to be a register other than Value or Addr. This | 
 |     // restriction is cleanly represented by adding an "early" definition of | 
 |     // Dest (or a latter use of all the sources.) | 
 |     Context.insert<InstFakeDef>(Success); | 
 |     if (Cond != CondARM32::AL) { | 
 |       _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex), | 
 |                      InstARM32::getOppositeCondition(Cond)); | 
 |     } | 
 |     _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond) | 
 |         ->setDestRedefined(); | 
 |     _cmp(Success, _0); | 
 |   } | 
 |  | 
 |   _br(Retry, CondARM32::NE); | 
 | } | 
 |  | 
 | namespace { | 
 | InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest, | 
 |                                 Variable *Src0, Operand *Src1) { | 
 |   InstArithmetic::OpKind Oper; | 
 |   switch (Operation) { | 
 |   default: | 
 |     llvm::report_fatal_error("Unknown AtomicRMW operation"); | 
 |   case Intrinsics::AtomicExchange: | 
 |     llvm::report_fatal_error("Can't handle Atomic xchg operation"); | 
 |   case Intrinsics::AtomicAdd: | 
 |     Oper = InstArithmetic::Add; | 
 |     break; | 
 |   case Intrinsics::AtomicAnd: | 
 |     Oper = InstArithmetic::And; | 
 |     break; | 
 |   case Intrinsics::AtomicSub: | 
 |     Oper = InstArithmetic::Sub; | 
 |     break; | 
 |   case Intrinsics::AtomicOr: | 
 |     Oper = InstArithmetic::Or; | 
 |     break; | 
 |   case Intrinsics::AtomicXor: | 
 |     Oper = InstArithmetic::Xor; | 
 |     break; | 
 |   } | 
 |   return InstArithmetic::create(Func, Oper, Dest, Src0, Src1); | 
 | } | 
 | } // end of anonymous namespace | 
 |  | 
 | void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 
 |                                  Operand *Addr, Operand *Val) { | 
 |   // retry: | 
 |   //     ldrex tmp, [addr] | 
 |   //     mov contents, tmp | 
 |   //     op result, contents, Val | 
 |   //     strex success, result, [addr] | 
 |   //     cmp success, 0 | 
 |   //     jne retry | 
 |   //     fake-use(addr, operand)  @ prevents undesirable clobbering. | 
 |   //     mov dest, contents | 
 |   auto DestTy = Dest->getType(); | 
 |  | 
 |   if (DestTy == IceType_i64) { | 
 |     lowerInt64AtomicRMW(Dest, Operation, Addr, Val); | 
 |     return; | 
 |   } | 
 |  | 
 |   Operand *ValRF = nullptr; | 
 |   if (llvm::isa<ConstantInteger32>(Val)) { | 
 |     ValRF = Val; | 
 |   } else { | 
 |     ValRF = legalizeToReg(Val); | 
 |   } | 
 |   auto *ContentsR = makeReg(DestTy); | 
 |   auto *ResultR = makeReg(DestTy); | 
 |  | 
 |   _dmb(); | 
 |   lowerLoadLinkedStoreExclusive( | 
 |       DestTy, Addr, | 
 |       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) { | 
 |         lowerAssign(InstAssign::create(Func, ContentsR, Tmp)); | 
 |         if (Operation == Intrinsics::AtomicExchange) { | 
 |           lowerAssign(InstAssign::create(Func, ResultR, ValRF)); | 
 |         } else { | 
 |           lowerArithmetic( | 
 |               createArithInst(Func, Operation, ResultR, ContentsR, ValRF)); | 
 |         } | 
 |         return ResultR; | 
 |       }); | 
 |   _dmb(); | 
 |   if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) { | 
 |     Context.insert<InstFakeUse>(ValR); | 
 |   } | 
 |   // Can't dce ContentsR. | 
 |   Context.insert<InstFakeUse>(ContentsR); | 
 |   lowerAssign(InstAssign::create(Func, Dest, ContentsR)); | 
 | } | 
 |  | 
 | void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, | 
 |                                       Operand *Addr, Operand *Val) { | 
 |   assert(Dest->getType() == IceType_i64); | 
 |  | 
 |   auto *ResultR = makeI64RegPair(); | 
 |  | 
 |   Context.insert<InstFakeDef>(ResultR); | 
 |  | 
 |   Operand *ValRF = nullptr; | 
 |   if (llvm::dyn_cast<ConstantInteger64>(Val)) { | 
 |     ValRF = Val; | 
 |   } else { | 
 |     auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | 
 |     ValR64->initHiLo(Func); | 
 |     ValR64->setMustNotHaveReg(); | 
 |     ValR64->getLo()->setMustHaveReg(); | 
 |     ValR64->getHi()->setMustHaveReg(); | 
 |     lowerAssign(InstAssign::create(Func, ValR64, Val)); | 
 |     ValRF = ValR64; | 
 |   } | 
 |  | 
 |   auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | 
 |   ContentsR->initHiLo(Func); | 
 |   ContentsR->setMustNotHaveReg(); | 
 |   ContentsR->getLo()->setMustHaveReg(); | 
 |   ContentsR->getHi()->setMustHaveReg(); | 
 |  | 
 |   _dmb(); | 
 |   lowerLoadLinkedStoreExclusive( | 
 |       IceType_i64, Addr, | 
 |       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) { | 
 |         lowerAssign(InstAssign::create(Func, ContentsR, Tmp)); | 
 |         Context.insert<InstFakeUse>(Tmp); | 
 |         if (Operation == Intrinsics::AtomicExchange) { | 
 |           lowerAssign(InstAssign::create(Func, ResultR, ValRF)); | 
 |         } else { | 
 |           lowerArithmetic( | 
 |               createArithInst(Func, Operation, ResultR, ContentsR, ValRF)); | 
 |         } | 
 |         Context.insert<InstFakeUse>(ResultR->getHi()); | 
 |         Context.insert<InstFakeDef>(ResultR, ResultR->getLo()) | 
 |             ->setDestRedefined(); | 
 |         return ResultR; | 
 |       }); | 
 |   _dmb(); | 
 |   if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) { | 
 |     Context.insert<InstFakeUse>(ValR64->getLo()); | 
 |     Context.insert<InstFakeUse>(ValR64->getHi()); | 
 |   } | 
 |   lowerAssign(InstAssign::create(Func, Dest, ContentsR)); | 
 | } | 
 |  | 
 | void TargetARM32::postambleCtpop64(const InstCall *Instr) { | 
 |   Operand *Arg0 = Instr->getArg(0); | 
 |   if (isInt32Asserting32Or64(Arg0->getType())) { | 
 |     return; | 
 |   } | 
 |   // The popcount helpers always return 32-bit values, while the intrinsic's | 
 |   // signature matches some 64-bit platform's native instructions and expect to | 
 |   // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the | 
 |   // user doesn't do that in the IR or doesn't toss the bits via truncate. | 
 |   auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest())); | 
 |   Variable *T = makeReg(IceType_i32); | 
 |   Operand *_0 = | 
 |       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 
 |   _mov(T, _0); | 
 |   _mov(DestHi, T); | 
 | } | 
 |  | 
 | void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 
 |   Variable *Dest = Instr->getDest(); | 
 |   Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void; | 
 |   Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID; | 
 |   switch (ID) { | 
 |   case Intrinsics::AtomicFence: | 
 |   case Intrinsics::AtomicFenceAll: | 
 |     assert(Dest == nullptr); | 
 |     _dmb(); | 
 |     return; | 
 |   case Intrinsics::AtomicIsLockFree: { | 
 |     Operand *ByteSize = Instr->getArg(0); | 
 |     auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize); | 
 |     if (CI == nullptr) { | 
 |       // The PNaCl ABI requires the byte size to be a compile-time constant. | 
 |       Func->setError("AtomicIsLockFree byte size should be compile-time const"); | 
 |       return; | 
 |     } | 
 |     static constexpr int32_t NotLockFree = 0; | 
 |     static constexpr int32_t LockFree = 1; | 
 |     int32_t Result = NotLockFree; | 
 |     switch (CI->getValue()) { | 
 |     case 1: | 
 |     case 2: | 
 |     case 4: | 
 |     case 8: | 
 |       Result = LockFree; | 
 |       break; | 
 |     } | 
 |     _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result))); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::AtomicLoad: { | 
 |     assert(isScalarIntegerType(DestTy)); | 
 |     // We require the memory address to be naturally aligned. Given that is the | 
 |     // case, then normal loads are atomic. | 
 |     if (!Intrinsics::isMemoryOrderValid( | 
 |             ID, getConstantMemoryOrder(Instr->getArg(1)))) { | 
 |       Func->setError("Unexpected memory ordering for AtomicLoad"); | 
 |       return; | 
 |     } | 
 |     Variable *T; | 
 |  | 
 |     if (DestTy == IceType_i64) { | 
 |       // ldrex is the only arm instruction that is guaranteed to load a 64-bit | 
 |       // integer atomically. Everything else works with a regular ldr. | 
 |       T = makeI64RegPair(); | 
 |       _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); | 
 |     } else { | 
 |       T = makeReg(DestTy); | 
 |       _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); | 
 |     } | 
 |     _dmb(); | 
 |     lowerAssign(InstAssign::create(Func, Dest, T)); | 
 |     // Adding a fake-use T to ensure the atomic load is not removed if Dest is | 
 |     // unused. | 
 |     Context.insert<InstFakeUse>(T); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::AtomicStore: { | 
 |     // We require the memory address to be naturally aligned. Given that is the | 
 |     // case, then normal loads are atomic. | 
 |     if (!Intrinsics::isMemoryOrderValid( | 
 |             ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 
 |       Func->setError("Unexpected memory ordering for AtomicStore"); | 
 |       return; | 
 |     } | 
 |  | 
 |     auto *Value = Instr->getArg(0); | 
 |     if (Value->getType() == IceType_i64) { | 
 |       auto *ValueR = makeI64RegPair(); | 
 |       Context.insert<InstFakeDef>(ValueR); | 
 |       lowerAssign(InstAssign::create(Func, ValueR, Value)); | 
 |       _dmb(); | 
 |       lowerLoadLinkedStoreExclusive( | 
 |           IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) { | 
 |             // The following fake-use prevents the ldrex instruction from being | 
 |             // dead code eliminated. | 
 |             Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp))); | 
 |             Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp))); | 
 |             Context.insert<InstFakeUse>(Tmp); | 
 |             return ValueR; | 
 |           }); | 
 |       Context.insert<InstFakeUse>(ValueR); | 
 |       _dmb(); | 
 |       return; | 
 |     } | 
 |  | 
 |     auto *ValueR = legalizeToReg(Instr->getArg(0)); | 
 |     const auto ValueTy = ValueR->getType(); | 
 |     assert(isScalarIntegerType(ValueTy)); | 
 |     auto *Addr = legalizeToReg(Instr->getArg(1)); | 
 |  | 
 |     // non-64-bit stores are atomically as long as the address is aligned. This | 
 |     // is PNaCl, so addresses are aligned. | 
 |     _dmb(); | 
 |     _str(ValueR, formMemoryOperand(Addr, ValueTy)); | 
 |     _dmb(); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::AtomicCmpxchg: { | 
 |     // retry: | 
 |     //     ldrex tmp, [addr] | 
 |     //     cmp tmp, expected | 
 |     //     mov expected, tmp | 
 |     //     strexeq success, new, [addr] | 
 |     //     cmpeq success, #0 | 
 |     //     bne retry | 
 |     //     mov dest, expected | 
 |     assert(isScalarIntegerType(DestTy)); | 
 |     // We require the memory address to be naturally aligned. Given that is the | 
 |     // case, then normal loads are atomic. | 
 |     if (!Intrinsics::isMemoryOrderValid( | 
 |             ID, getConstantMemoryOrder(Instr->getArg(3)), | 
 |             getConstantMemoryOrder(Instr->getArg(4)))) { | 
 |       Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (DestTy == IceType_i64) { | 
 |       Variable *LoadedValue = nullptr; | 
 |  | 
 |       auto *New = makeI64RegPair(); | 
 |       Context.insert<InstFakeDef>(New); | 
 |       lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); | 
 |  | 
 |       auto *Expected = makeI64RegPair(); | 
 |       Context.insert<InstFakeDef>(Expected); | 
 |       lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); | 
 |  | 
 |       _dmb(); | 
 |       lowerLoadLinkedStoreExclusive( | 
 |           DestTy, Instr->getArg(0), | 
 |           [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) { | 
 |             auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected)); | 
 |             auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected)); | 
 |             auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp)); | 
 |             auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp)); | 
 |             _cmp(TmpLoR, ExpectedLoR); | 
 |             _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ); | 
 |             LoadedValue = Tmp; | 
 |             return New; | 
 |           }, | 
 |           CondARM32::EQ); | 
 |       _dmb(); | 
 |  | 
 |       Context.insert<InstFakeUse>(LoadedValue); | 
 |       lowerAssign(InstAssign::create(Func, Dest, LoadedValue)); | 
 |       // The fake-use Expected prevents the assignments to Expected (above) | 
 |       // from being removed if Dest is not used. | 
 |       Context.insert<InstFakeUse>(Expected); | 
 |       // New needs to be alive here, or its live range will end in the | 
 |       // strex instruction. | 
 |       Context.insert<InstFakeUse>(New); | 
 |       return; | 
 |     } | 
 |  | 
 |     auto *New = legalizeToReg(Instr->getArg(2)); | 
 |     auto *Expected = legalizeToReg(Instr->getArg(1)); | 
 |     Variable *LoadedValue = nullptr; | 
 |  | 
 |     _dmb(); | 
 |     lowerLoadLinkedStoreExclusive( | 
 |         DestTy, Instr->getArg(0), | 
 |         [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) { | 
 |           lowerIcmpCond(InstIcmp::Eq, Tmp, Expected); | 
 |           LoadedValue = Tmp; | 
 |           return New; | 
 |         }, | 
 |         CondARM32::EQ); | 
 |     _dmb(); | 
 |  | 
 |     lowerAssign(InstAssign::create(Func, Dest, LoadedValue)); | 
 |     Context.insert<InstFakeUse>(Expected); | 
 |     Context.insert<InstFakeUse>(New); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::AtomicRMW: { | 
 |     if (!Intrinsics::isMemoryOrderValid( | 
 |             ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 
 |       Func->setError("Unexpected memory ordering for AtomicRMW"); | 
 |       return; | 
 |     } | 
 |     lowerAtomicRMW( | 
 |         Dest, static_cast<uint32_t>( | 
 |                   llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), | 
 |         Instr->getArg(1), Instr->getArg(2)); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Bswap: { | 
 |     Operand *Val = Instr->getArg(0); | 
 |     Type Ty = Val->getType(); | 
 |     if (Ty == IceType_i64) { | 
 |       Val = legalizeUndef(Val); | 
 |       Variable *Val_Lo = legalizeToReg(loOperand(Val)); | 
 |       Variable *Val_Hi = legalizeToReg(hiOperand(Val)); | 
 |       Variable *T_Lo = makeReg(IceType_i32); | 
 |       Variable *T_Hi = makeReg(IceType_i32); | 
 |       auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 
 |       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 
 |       _rev(T_Lo, Val_Lo); | 
 |       _rev(T_Hi, Val_Hi); | 
 |       _mov(DestLo, T_Hi); | 
 |       _mov(DestHi, T_Lo); | 
 |     } else { | 
 |       assert(Ty == IceType_i32 || Ty == IceType_i16); | 
 |       Variable *ValR = legalizeToReg(Val); | 
 |       Variable *T = makeReg(Ty); | 
 |       _rev(T, ValR); | 
 |       if (Val->getType() == IceType_i16) { | 
 |         Operand *_16 = shAmtImm(16); | 
 |         _lsr(T, T, _16); | 
 |       } | 
 |       _mov(Dest, T); | 
 |     } | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Ctpop: { | 
 |     llvm::report_fatal_error("Ctpop should have been prelowered."); | 
 |   } | 
 |   case Intrinsics::Ctlz: { | 
 |     // The "is zero undef" parameter is ignored and we always return a | 
 |     // well-defined value. | 
 |     Operand *Val = Instr->getArg(0); | 
 |     Variable *ValLoR; | 
 |     Variable *ValHiR = nullptr; | 
 |     if (Val->getType() == IceType_i64) { | 
 |       Val = legalizeUndef(Val); | 
 |       ValLoR = legalizeToReg(loOperand(Val)); | 
 |       ValHiR = legalizeToReg(hiOperand(Val)); | 
 |     } else { | 
 |       ValLoR = legalizeToReg(Val); | 
 |     } | 
 |     lowerCLZ(Dest, ValLoR, ValHiR); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Cttz: { | 
 |     // Essentially like Clz, but reverse the bits first. | 
 |     Operand *Val = Instr->getArg(0); | 
 |     Variable *ValLoR; | 
 |     Variable *ValHiR = nullptr; | 
 |     if (Val->getType() == IceType_i64) { | 
 |       Val = legalizeUndef(Val); | 
 |       ValLoR = legalizeToReg(loOperand(Val)); | 
 |       ValHiR = legalizeToReg(hiOperand(Val)); | 
 |       Variable *TLo = makeReg(IceType_i32); | 
 |       Variable *THi = makeReg(IceType_i32); | 
 |       _rbit(TLo, ValLoR); | 
 |       _rbit(THi, ValHiR); | 
 |       ValLoR = THi; | 
 |       ValHiR = TLo; | 
 |     } else { | 
 |       ValLoR = legalizeToReg(Val); | 
 |       Variable *T = makeReg(IceType_i32); | 
 |       _rbit(T, ValLoR); | 
 |       ValLoR = T; | 
 |     } | 
 |     lowerCLZ(Dest, ValLoR, ValHiR); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Fabs: { | 
 |     Variable *T = makeReg(DestTy); | 
 |     _vabs(T, legalizeToReg(Instr->getArg(0))); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Longjmp: { | 
 |     llvm::report_fatal_error("longjmp should have been prelowered."); | 
 |   } | 
 |   case Intrinsics::Memcpy: { | 
 |     llvm::report_fatal_error("memcpy should have been prelowered."); | 
 |   } | 
 |   case Intrinsics::Memmove: { | 
 |     llvm::report_fatal_error("memmove should have been prelowered."); | 
 |   } | 
 |   case Intrinsics::Memset: { | 
 |     llvm::report_fatal_error("memmove should have been prelowered."); | 
 |   } | 
 |   case Intrinsics::NaClReadTP: { | 
 |     if (SandboxingType != ST_NaCl) { | 
 |       llvm::report_fatal_error("nacl-read-tp should have been prelowered."); | 
 |     } | 
 |     Variable *TP = legalizeToReg(OperandARM32Mem::create( | 
 |         Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), | 
 |         llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); | 
 |     _mov(Dest, TP); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Setjmp: { | 
 |     llvm::report_fatal_error("setjmp should have been prelowered."); | 
 |   } | 
 |   case Intrinsics::Sqrt: { | 
 |     assert(isScalarFloatingType(Dest->getType()) || | 
 |            getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl); | 
 |     Variable *Src = legalizeToReg(Instr->getArg(0)); | 
 |     Variable *T = makeReg(DestTy); | 
 |     _vsqrt(T, Src); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Stacksave: { | 
 |     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 
 |     _mov(Dest, SP); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Stackrestore: { | 
 |     Variable *Val = legalizeToReg(Instr->getArg(0)); | 
 |     Sandboxer(this).reset_sp(Val); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Trap: | 
 |     _trap(); | 
 |     return; | 
 |   case Intrinsics::AddSaturateSigned: | 
 |   case Intrinsics::AddSaturateUnsigned: { | 
 |     bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned); | 
 |     Variable *Src0 = legalizeToReg(Instr->getArg(0)); | 
 |     Variable *Src1 = legalizeToReg(Instr->getArg(1)); | 
 |     Variable *T = makeReg(DestTy); | 
 |     _vqadd(T, Src0, Src1, Unsigned); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::LoadSubVector: { | 
 |     assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) && | 
 |            "LoadSubVector second argument must be a constant"); | 
 |     Variable *Dest = Instr->getDest(); | 
 |     Type Ty = Dest->getType(); | 
 |     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1)); | 
 |     Operand *Addr = Instr->getArg(0); | 
 |     OperandARM32Mem *Src = formMemoryOperand(Addr, Ty); | 
 |     doMockBoundsCheck(Src); | 
 |  | 
 |     if (Dest->isRematerializable()) { | 
 |       Context.insert<InstFakeDef>(Dest); | 
 |       return; | 
 |     } | 
 |  | 
 |     auto *T = makeReg(Ty); | 
 |     switch (SubVectorSize->getValue()) { | 
 |     case 4: | 
 |       _vldr1d(T, Src); | 
 |       break; | 
 |     case 8: | 
 |       _vldr1q(T, Src); | 
 |       break; | 
 |     default: | 
 |       Func->setError("Unexpected size for LoadSubVector"); | 
 |       return; | 
 |     } | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::StoreSubVector: { | 
 |     assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) && | 
 |            "StoreSubVector third argument must be a constant"); | 
 |     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2)); | 
 |     Variable *Value = legalizeToReg(Instr->getArg(0)); | 
 |     Operand *Addr = Instr->getArg(1); | 
 |     OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); | 
 |     doMockBoundsCheck(NewAddr); | 
 |  | 
 |     Value = legalizeToReg(Value); | 
 |  | 
 |     switch (SubVectorSize->getValue()) { | 
 |     case 4: | 
 |       _vstr1d(Value, NewAddr); | 
 |       break; | 
 |     case 8: | 
 |       _vstr1q(Value, NewAddr); | 
 |       break; | 
 |     default: | 
 |       Func->setError("Unexpected size for StoreSubVector"); | 
 |       return; | 
 |     } | 
 |     return; | 
 |   } | 
 |   case Intrinsics::MultiplyAddPairs: { | 
 |     Variable *Src0 = legalizeToReg(Instr->getArg(0)); | 
 |     Variable *Src1 = legalizeToReg(Instr->getArg(1)); | 
 |     Variable *T = makeReg(DestTy); | 
 |     _vmlap(T, Src0, Src1); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::MultiplyHighSigned: | 
 |   case Intrinsics::MultiplyHighUnsigned: { | 
 |     bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned); | 
 |     Variable *Src0 = legalizeToReg(Instr->getArg(0)); | 
 |     Variable *Src1 = legalizeToReg(Instr->getArg(1)); | 
 |     Variable *T = makeReg(DestTy); | 
 |     _vmulh(T, Src0, Src1, Unsigned); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Nearbyint: { | 
 |     UnimplementedLoweringError(this, Instr); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::Round: { | 
 |     UnimplementedLoweringError(this, Instr); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::SignMask: { | 
 |     UnimplementedLoweringError(this, Instr); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::SubtractSaturateSigned: | 
 |   case Intrinsics::SubtractSaturateUnsigned: { | 
 |     bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned); | 
 |     Variable *Src0 = legalizeToReg(Instr->getArg(0)); | 
 |     Variable *Src1 = legalizeToReg(Instr->getArg(1)); | 
 |     Variable *T = makeReg(DestTy); | 
 |     _vqsub(T, Src0, Src1, Unsigned); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   case Intrinsics::VectorPackSigned: | 
 |   case Intrinsics::VectorPackUnsigned: { | 
 |     bool Unsigned = (ID == Intrinsics::VectorPackUnsigned); | 
 |     bool Saturating = true; | 
 |     Variable *Src0 = legalizeToReg(Instr->getArg(0)); | 
 |     Variable *Src1 = legalizeToReg(Instr->getArg(1)); | 
 |     Variable *T = makeReg(DestTy); | 
 |     _vqmovn2(T, Src0, Src1, Unsigned, Saturating); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |   default: // UnknownIntrinsic | 
 |     Func->setError("Unexpected intrinsic"); | 
 |     return; | 
 |   } | 
 |   return; | 
 | } | 
 |  | 
 | void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) { | 
 |   Type Ty = Dest->getType(); | 
 |   assert(Ty == IceType_i32 || Ty == IceType_i64); | 
 |   Variable *T = makeReg(IceType_i32); | 
 |   _clz(T, ValLoR); | 
 |   if (Ty == IceType_i64) { | 
 |     auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 
 |     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 
 |     Operand *Zero = | 
 |         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 
 |     Operand *ThirtyTwo = | 
 |         legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | 
 |     _cmp(ValHiR, Zero); | 
 |     Variable *T2 = makeReg(IceType_i32); | 
 |     _add(T2, T, ThirtyTwo); | 
 |     _clz(T2, ValHiR, CondARM32::NE); | 
 |     // T2 is actually a source as well when the predicate is not AL (since it | 
 |     // may leave T2 alone). We use _set_dest_redefined to prolong the liveness | 
 |     // of T2 as if it was used as a source. | 
 |     _set_dest_redefined(); | 
 |     _mov(DestLo, T2); | 
 |     Variable *T3 = makeReg(Zero->getType()); | 
 |     _mov(T3, Zero); | 
 |     _mov(DestHi, T3); | 
 |     return; | 
 |   } | 
 |   _mov(Dest, T); | 
 |   return; | 
 | } | 
 |  | 
 | void TargetARM32::lowerLoad(const InstLoad *Load) { | 
 |   // A Load instruction can be treated the same as an Assign instruction, after | 
 |   // the source operand is transformed into an OperandARM32Mem operand. | 
 |   Type Ty = Load->getDest()->getType(); | 
 |   Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 
 |   Variable *DestLoad = Load->getDest(); | 
 |  | 
 |   // TODO(jvoung): handled folding opportunities. Sign and zero extension can | 
 |   // be folded into a load. | 
 |   auto *Assign = InstAssign::create(Func, DestLoad, Src0); | 
 |   lowerAssign(Assign); | 
 | } | 
 |  | 
 | namespace { | 
 | void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset, | 
 |                     const Variable *OffsetReg, int16_t OffsetRegShAmt, | 
 |                     const Inst *Reason) { | 
 |   if (!BuildDefs::dump()) | 
 |     return; | 
 |   if (!Func->isVerbose(IceV_AddrOpt)) | 
 |     return; | 
 |   OstreamLocker _(Func->getContext()); | 
 |   Ostream &Str = Func->getContext()->getStrDump(); | 
 |   Str << "Instruction: "; | 
 |   Reason->dumpDecorated(Func); | 
 |   Str << "  results in Base="; | 
 |   if (Base) | 
 |     Base->dump(Func); | 
 |   else | 
 |     Str << "<null>"; | 
 |   Str << ", OffsetReg="; | 
 |   if (OffsetReg) | 
 |     OffsetReg->dump(Func); | 
 |   else | 
 |     Str << "<null>"; | 
 |   Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n"; | 
 | } | 
 |  | 
 | bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var, | 
 |                  int32_t *Offset, const Inst **Reason) { | 
 |   // Var originates from Var=SrcVar ==> set Var:=SrcVar | 
 |   if (*Var == nullptr) | 
 |     return false; | 
 |   const Inst *VarAssign = VMetadata->getSingleDefinition(*Var); | 
 |   if (!VarAssign) | 
 |     return false; | 
 |   assert(!VMetadata->isMultiDef(*Var)); | 
 |   if (!llvm::isa<InstAssign>(VarAssign)) | 
 |     return false; | 
 |  | 
 |   Operand *SrcOp = VarAssign->getSrc(0); | 
 |   bool Optimized = false; | 
 |   if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) { | 
 |     if (!VMetadata->isMultiDef(SrcVar) || | 
 |         // TODO: ensure SrcVar stays single-BB | 
 |         false) { | 
 |       Optimized = true; | 
 |       *Var = SrcVar; | 
 |     } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) { | 
 |       int32_t MoreOffset = Const->getValue(); | 
 |       int32_t NewOffset = MoreOffset + *Offset; | 
 |       if (Utils::WouldOverflowAdd(*Offset, MoreOffset)) | 
 |         return false; | 
 |       *Var = nullptr; | 
 |       *Offset += NewOffset; | 
 |       Optimized = true; | 
 |     } | 
 |   } | 
 |  | 
 |   if (Optimized) { | 
 |     *Reason = VarAssign; | 
 |   } | 
 |  | 
 |   return Optimized; | 
 | } | 
 |  | 
 | bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) { | 
 |   if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { | 
 |     switch (Arith->getOp()) { | 
 |     default: | 
 |       return false; | 
 |     case InstArithmetic::Add: | 
 |     case InstArithmetic::Sub: | 
 |       *Kind = Arith->getOp(); | 
 |       return true; | 
 |     } | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base, | 
 |                             Variable **OffsetReg, int32_t OffsetRegShamt, | 
 |                             const Inst **Reason) { | 
 |   // OffsetReg==nullptr && Base is Base=Var1+Var2 ==> | 
 |   //   set Base=Var1, OffsetReg=Var2, Shift=0 | 
 |   if (*Base == nullptr) | 
 |     return false; | 
 |   if (*OffsetReg != nullptr) | 
 |     return false; | 
 |   (void)OffsetRegShamt; | 
 |   assert(OffsetRegShamt == 0); | 
 |   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base); | 
 |   if (BaseInst == nullptr) | 
 |     return false; | 
 |   assert(!VMetadata->isMultiDef(*Base)); | 
 |   if (BaseInst->getSrcSize() < 2) | 
 |     return false; | 
 |   auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0)); | 
 |   if (!Var1) | 
 |     return false; | 
 |   if (VMetadata->isMultiDef(Var1)) | 
 |     return false; | 
 |   auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1)); | 
 |   if (!Var2) | 
 |     return false; | 
 |   if (VMetadata->isMultiDef(Var2)) | 
 |     return false; | 
 |   InstArithmetic::OpKind _; | 
 |   if (!isAddOrSub(BaseInst, &_) || | 
 |       // TODO: ensure Var1 and Var2 stay single-BB | 
 |       false) | 
 |     return false; | 
 |   *Base = Var1; | 
 |   *OffsetReg = Var2; | 
 |   // OffsetRegShamt is already 0. | 
 |   *Reason = BaseInst; | 
 |   return true; | 
 | } | 
 |  | 
 | bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata, | 
 |                            Variable **OffsetReg, OperandARM32::ShiftKind *Kind, | 
 |                            int32_t *OffsetRegShamt, const Inst **Reason) { | 
 |   // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==> | 
 |   //   OffsetReg=Var, Shift+=log2(Const) | 
 |   // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==> | 
 |   //   OffsetReg=Var, Shift+=Const | 
 |   // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==> | 
 |   //   OffsetReg=Var, Shift-=Const | 
 |   OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift; | 
 |   if (*OffsetReg == nullptr) | 
 |     return false; | 
 |   auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg); | 
 |   if (IndexInst == nullptr) | 
 |     return false; | 
 |   assert(!VMetadata->isMultiDef(*OffsetReg)); | 
 |   if (IndexInst->getSrcSize() < 2) | 
 |     return false; | 
 |   auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst); | 
 |   if (ArithInst == nullptr) | 
 |     return false; | 
 |   auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0)); | 
 |   if (Var == nullptr) | 
 |     return false; | 
 |   auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1)); | 
 |   if (Const == nullptr) { | 
 |     assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0))); | 
 |     return false; | 
 |   } | 
 |   if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32) | 
 |     return false; | 
 |  | 
 |   uint32_t NewShamt = -1; | 
 |   switch (ArithInst->getOp()) { | 
 |   default: | 
 |     return false; | 
 |   case InstArithmetic::Shl: { | 
 |     NewShiftKind = OperandARM32::LSL; | 
 |     NewShamt = Const->getValue(); | 
 |     if (NewShamt > 31) | 
 |       return false; | 
 |   } break; | 
 |   case InstArithmetic::Lshr: { | 
 |     NewShiftKind = OperandARM32::LSR; | 
 |     NewShamt = Const->getValue(); | 
 |     if (NewShamt > 31) | 
 |       return false; | 
 |   } break; | 
 |   case InstArithmetic::Ashr: { | 
 |     NewShiftKind = OperandARM32::ASR; | 
 |     NewShamt = Const->getValue(); | 
 |     if (NewShamt > 31) | 
 |       return false; | 
 |   } break; | 
 |   case InstArithmetic::Udiv: | 
 |   case InstArithmetic::Mul: { | 
 |     const uint32_t UnsignedConst = Const->getValue(); | 
 |     NewShamt = llvm::findFirstSet(UnsignedConst); | 
 |     if (NewShamt != llvm::findLastSet(UnsignedConst)) { | 
 |       // First bit set is not the same as the last bit set, so Const is not | 
 |       // a power of 2. | 
 |       return false; | 
 |     } | 
 |     NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv | 
 |                        ? OperandARM32::LSR | 
 |                        : OperandARM32::LSL; | 
 |   } break; | 
 |   } | 
 |   // Allowed "transitions": | 
 |   //   kNoShift -> * iff NewShamt < 31 | 
 |   //   LSL -> LSL    iff NewShamt + OffsetRegShamt < 31 | 
 |   //   LSR -> LSR    iff NewShamt + OffsetRegShamt < 31 | 
 |   //   ASR -> ASR    iff NewShamt + OffsetRegShamt < 31 | 
 |   if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) { | 
 |     return false; | 
 |   } | 
 |   const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt; | 
 |   if (NewOffsetRegShamt > 31) | 
 |     return false; | 
 |   *OffsetReg = Var; | 
 |   *OffsetRegShamt = NewOffsetRegShamt; | 
 |   *Kind = NewShiftKind; | 
 |   *Reason = IndexInst; | 
 |   return true; | 
 | } | 
 |  | 
 | bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base, | 
 |                      int32_t *Offset, const Inst **Reason) { | 
 |   // Base is Base=Var+Const || Base is Base=Const+Var ==> | 
 |   //   set Base=Var, Offset+=Const | 
 |   // Base is Base=Var-Const ==> | 
 |   //   set Base=Var, Offset-=Const | 
 |   if (*Base == nullptr) | 
 |     return false; | 
 |   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base); | 
 |   if (BaseInst == nullptr) { | 
 |     return false; | 
 |   } | 
 |   assert(!VMetadata->isMultiDef(*Base)); | 
 |  | 
 |   auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst); | 
 |   if (ArithInst == nullptr) | 
 |     return false; | 
 |   InstArithmetic::OpKind Kind; | 
 |   if (!isAddOrSub(ArithInst, &Kind)) | 
 |     return false; | 
 |   bool IsAdd = Kind == InstArithmetic::Add; | 
 |   Operand *Src0 = ArithInst->getSrc(0); | 
 |   Operand *Src1 = ArithInst->getSrc(1); | 
 |   auto *Var0 = llvm::dyn_cast<Variable>(Src0); | 
 |   auto *Var1 = llvm::dyn_cast<Variable>(Src1); | 
 |   auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0); | 
 |   auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1); | 
 |   Variable *NewBase = nullptr; | 
 |   int32_t NewOffset = *Offset; | 
 |  | 
 |   if (Var0 == nullptr && Const0 == nullptr) { | 
 |     assert(llvm::isa<ConstantRelocatable>(Src0)); | 
 |     return false; | 
 |   } | 
 |  | 
 |   if (Var1 == nullptr && Const1 == nullptr) { | 
 |     assert(llvm::isa<ConstantRelocatable>(Src1)); | 
 |     return false; | 
 |   } | 
 |  | 
 |   if (Var0 && Var1) | 
 |     // TODO(jpp): merge base/index splitting into here. | 
 |     return false; | 
 |   if (!IsAdd && Var1) | 
 |     return false; | 
 |   if (Var0) | 
 |     NewBase = Var0; | 
 |   else if (Var1) | 
 |     NewBase = Var1; | 
 |   // Compute the updated constant offset. | 
 |   if (Const0) { | 
 |     int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue(); | 
 |     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) | 
 |       return false; | 
 |     NewOffset += MoreOffset; | 
 |   } | 
 |   if (Const1) { | 
 |     int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue(); | 
 |     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) | 
 |       return false; | 
 |     NewOffset += MoreOffset; | 
 |   } | 
 |  | 
 |   // Update the computed address parameters once we are sure optimization | 
 |   // is valid. | 
 |   *Base = NewBase; | 
 |   *Offset = NewOffset; | 
 |   *Reason = BaseInst; | 
 |   return true; | 
 | } | 
 | } // end of anonymous namespace | 
 |  | 
 | OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func, | 
 |                                                  const Inst *LdSt, | 
 |                                                  Operand *Base) { | 
 |   assert(Base != nullptr); | 
 |   int32_t OffsetImm = 0; | 
 |   Variable *OffsetReg = nullptr; | 
 |   int32_t OffsetRegShamt = 0; | 
 |   OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift; | 
 |  | 
 |   Func->resetCurrentNode(); | 
 |   if (Func->isVerbose(IceV_AddrOpt)) { | 
 |     OstreamLocker _(Func->getContext()); | 
 |     Ostream &Str = Func->getContext()->getStrDump(); | 
 |     Str << "\nAddress mode formation:\t"; | 
 |     LdSt->dumpDecorated(Func); | 
 |   } | 
 |  | 
 |   if (isVectorType(Ty)) | 
 |     // vector loads and stores do not allow offsets, and only support the | 
 |     // "[reg]" addressing mode (the other supported modes are write back.) | 
 |     return nullptr; | 
 |  | 
 |   auto *BaseVar = llvm::dyn_cast<Variable>(Base); | 
 |   if (BaseVar == nullptr) | 
 |     return nullptr; | 
 |  | 
 |   (void)MemTraitsSize; | 
 |   assert(Ty < MemTraitsSize); | 
 |   auto *TypeTraits = &MemTraits[Ty]; | 
 |   const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex; | 
 |   const bool CanHaveShiftedIndex = | 
 |       !NeedSandboxing && TypeTraits->CanHaveShiftedIndex; | 
 |   const bool CanHaveImm = TypeTraits->CanHaveImm; | 
 |   const int32_t ValidImmMask = TypeTraits->ValidImmMask; | 
 |   (void)ValidImmMask; | 
 |   assert(!CanHaveImm || ValidImmMask >= 0); | 
 |  | 
 |   const VariablesMetadata *VMetadata = Func->getVMetadata(); | 
 |   const Inst *Reason = nullptr; | 
 |  | 
 |   do { | 
 |     if (Reason != nullptr) { | 
 |       dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt, | 
 |                      Reason); | 
 |       Reason = nullptr; | 
 |     } | 
 |  | 
 |     if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) { | 
 |       continue; | 
 |     } | 
 |  | 
 |     if (CanHaveIndex && | 
 |         matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) { | 
 |       continue; | 
 |     } | 
 |  | 
 |     if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg, | 
 |                                                OffsetRegShamt, &Reason)) { | 
 |       continue; | 
 |     } | 
 |  | 
 |     if (CanHaveShiftedIndex) { | 
 |       if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind, | 
 |                                 &OffsetRegShamt, &Reason)) { | 
 |         continue; | 
 |       } | 
 |  | 
 |       if ((OffsetRegShamt == 0) && | 
 |           matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind, | 
 |                                 &OffsetRegShamt, &Reason)) { | 
 |         std::swap(BaseVar, OffsetReg); | 
 |         continue; | 
 |       } | 
 |     } | 
 |  | 
 |     if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) { | 
 |       continue; | 
 |     } | 
 |   } while (Reason); | 
 |  | 
 |   if (BaseVar == nullptr) { | 
 |     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to | 
 |     // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}]. | 
 |     // Instead of a zeroed BaseReg, we initialize it with OffsetImm: | 
 |     // | 
 |     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] -> | 
 |     //     mov BaseReg, #OffsetImm | 
 |     //     use of [BaseReg, OffsetReg{, LSL Shamt}] | 
 |     // | 
 |     const Type PointerType = getPointerType(); | 
 |     BaseVar = makeReg(PointerType); | 
 |     Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm)); | 
 |     OffsetImm = 0; | 
 |   } else if (OffsetImm != 0) { | 
 |     // ARM Ldr/Str instructions have limited range immediates. The formation | 
 |     // loop above materialized an Immediate carelessly, so we ensure the | 
 |     // generated offset is sane. | 
 |     const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm; | 
 |     const InstArithmetic::OpKind Op = | 
 |         OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub; | 
 |  | 
 |     if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) || | 
 |         OffsetReg != nullptr) { | 
 |       if (OffsetReg == nullptr) { | 
 |         // We formed a [Base, #const] addressing mode which is not encodable in | 
 |         // ARM. There is little point in forming an address mode now if we don't | 
 |         // have an offset. Effectively, we would end up with something like | 
 |         // | 
 |         // [Base, #const] -> add T, Base, #const | 
 |         //                   use of [T] | 
 |         // | 
 |         // Which is exactly what we already have. So we just bite the bullet | 
 |         // here and don't form any address mode. | 
 |         return nullptr; | 
 |       } | 
 |       // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to | 
 |       // | 
 |       // [Base, Offset, {LSL amount}, #const] -> | 
 |       //      add T, Base, #const | 
 |       //      use of [T, Offset {, LSL amount}] | 
 |       const Type PointerType = getPointerType(); | 
 |       Variable *T = makeReg(PointerType); | 
 |       Context.insert<InstArithmetic>(Op, T, BaseVar, | 
 |                                      Ctx->getConstantInt32(PositiveOffset)); | 
 |       BaseVar = T; | 
 |       OffsetImm = 0; | 
 |     } | 
 |   } | 
 |  | 
 |   assert(BaseVar != nullptr); | 
 |   assert(OffsetImm == 0 || OffsetReg == nullptr); | 
 |   assert(OffsetReg == nullptr || CanHaveIndex); | 
 |   assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm | 
 |                        : (ValidImmMask & OffsetImm) == OffsetImm); | 
 |  | 
 |   if (OffsetReg != nullptr) { | 
 |     Variable *OffsetR = makeReg(getPointerType()); | 
 |     Context.insert<InstAssign>(OffsetR, OffsetReg); | 
 |     return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind, | 
 |                                    OffsetRegShamt); | 
 |   } | 
 |  | 
 |   return OperandARM32Mem::create( | 
 |       Func, Ty, BaseVar, | 
 |       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm))); | 
 | } | 
 |  | 
 | void TargetARM32::doAddressOptLoad() { | 
 |   Inst *Instr = iteratorToInst(Context.getCur()); | 
 |   assert(llvm::isa<InstLoad>(Instr)); | 
 |   Variable *Dest = Instr->getDest(); | 
 |   Operand *Addr = Instr->getSrc(0); | 
 |   if (OperandARM32Mem *Mem = | 
 |           formAddressingMode(Dest->getType(), Func, Instr, Addr)) { | 
 |     Instr->setDeleted(); | 
 |     Context.insert<InstLoad>(Dest, Mem); | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::randomlyInsertNop(float Probability, | 
 |                                     RandomNumberGenerator &RNG) { | 
 |   RandomNumberGeneratorWrapper RNGW(RNG); | 
 |   if (RNGW.getTrueWithProbability(Probability)) { | 
 |     _nop(); | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) { | 
 |   Func->setError("Phi found in regular instruction list"); | 
 | } | 
 |  | 
 | void TargetARM32::lowerRet(const InstRet *Instr) { | 
 |   Variable *Reg = nullptr; | 
 |   if (Instr->hasRetValue()) { | 
 |     Operand *Src0 = Instr->getRetValue(); | 
 |     Type Ty = Src0->getType(); | 
 |     if (Ty == IceType_i64) { | 
 |       Src0 = legalizeUndef(Src0); | 
 |       Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0); | 
 |       Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1); | 
 |       Reg = R0; | 
 |       Context.insert<InstFakeUse>(R1); | 
 |     } else if (Ty == IceType_f32) { | 
 |       Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0); | 
 |       Reg = S0; | 
 |     } else if (Ty == IceType_f64) { | 
 |       Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); | 
 |       Reg = D0; | 
 |     } else if (isVectorType(Src0->getType())) { | 
 |       Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); | 
 |       Reg = Q0; | 
 |     } else { | 
 |       Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); | 
 |       Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0); | 
 |       _mov(Reg, Src0F, CondARM32::AL); | 
 |     } | 
 |   } | 
 |   // Add a ret instruction even if sandboxing is enabled, because addEpilog | 
 |   // explicitly looks for a ret instruction as a marker for where to insert the | 
 |   // frame removal instructions. addEpilog is responsible for restoring the | 
 |   // "lr" register as needed prior to this ret instruction. | 
 |   _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); | 
 |  | 
 |   // Add a fake use of sp to make sure sp stays alive for the entire function. | 
 |   // Otherwise post-call sp adjustments get dead-code eliminated. | 
 |   // TODO: Are there more places where the fake use should be inserted? E.g. | 
 |   // "void f(int n){while(1) g(n);}" may not have a ret instruction. | 
 |   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 
 |   Context.insert<InstFakeUse>(SP); | 
 | } | 
 |  | 
 | void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) { | 
 |   auto *Dest = Instr->getDest(); | 
 |   const Type DestTy = Dest->getType(); | 
 |  | 
 |   auto *T = makeReg(DestTy); | 
 |   auto *Src0 = Instr->getSrc(0); | 
 |   auto *Src1 = Instr->getSrc(1); | 
 |   const SizeT NumElements = typeNumElements(DestTy); | 
 |   const Type ElementType = typeElementType(DestTy); | 
 |  | 
 |   bool Replicate = true; | 
 |   for (SizeT I = 1; Replicate && I < Instr->getNumIndexes(); ++I) { | 
 |     if (Instr->getIndexValue(I) != Instr->getIndexValue(0)) { | 
 |       Replicate = false; | 
 |     } | 
 |   } | 
 |  | 
 |   if (Replicate) { | 
 |     Variable *Src0Var = legalizeToReg(Src0); | 
 |     _vdup(T, Src0Var, Instr->getIndexValue(0)); | 
 |     _mov(Dest, T); | 
 |     return; | 
 |   } | 
 |  | 
 |   switch (DestTy) { | 
 |   case IceType_v8i1: | 
 |   case IceType_v8i16: { | 
 |     static constexpr SizeT ExpectedNumElements = 8; | 
 |     assert(ExpectedNumElements == Instr->getNumIndexes()); | 
 |     (void)ExpectedNumElements; | 
 |  | 
 |     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       _vzip(T, Src0R, Src0R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Instr->indexesAre(0, 8, 1, 9, 2, 10, 3, 11)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *Src1R = legalizeToReg(Src1); | 
 |       _vzip(T, Src0R, Src1R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Instr->indexesAre(0, 2, 4, 6, 0, 2, 4, 6)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       _vqmovn2(T, Src0R, Src0R, false, false); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |   } break; | 
 |   case IceType_v16i1: | 
 |   case IceType_v16i8: { | 
 |     static constexpr SizeT ExpectedNumElements = 16; | 
 |     assert(ExpectedNumElements == Instr->getNumIndexes()); | 
 |     (void)ExpectedNumElements; | 
 |  | 
 |     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       _vzip(T, Src0R, Src0R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Instr->indexesAre(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, | 
 |                           23)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *Src1R = legalizeToReg(Src1); | 
 |       _vzip(T, Src0R, Src1R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |   } break; | 
 |   case IceType_v4i1: | 
 |   case IceType_v4i32: | 
 |   case IceType_v4f32: { | 
 |     static constexpr SizeT ExpectedNumElements = 4; | 
 |     assert(ExpectedNumElements == Instr->getNumIndexes()); | 
 |     (void)ExpectedNumElements; | 
 |  | 
 |     if (Instr->indexesAre(0, 0, 1, 1)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       _vzip(T, Src0R, Src0R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Instr->indexesAre(0, 4, 1, 5)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *Src1R = legalizeToReg(Src1); | 
 |       _vzip(T, Src0R, Src1R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Instr->indexesAre(0, 1, 4, 5)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *Src1R = legalizeToReg(Src1); | 
 |       _vmovlh(T, Src0R, Src1R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Instr->indexesAre(2, 3, 2, 3)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       _vmovhl(T, Src0R, Src0R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |  | 
 |     if (Instr->indexesAre(2, 3, 6, 7)) { | 
 |       Variable *Src0R = legalizeToReg(Src0); | 
 |       Variable *Src1R = legalizeToReg(Src1); | 
 |       _vmovhl(T, Src1R, Src0R); | 
 |       _mov(Dest, T); | 
 |       return; | 
 |     } | 
 |   } break; | 
 |   default: | 
 |     break; | 
 |     // TODO(jpp): figure out how to properly lower this without scalarization. | 
 |   } | 
 |  | 
 |   // Unoptimized shuffle. Perform a series of inserts and extracts. | 
 |   Context.insert<InstFakeDef>(T); | 
 |   for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { | 
 |     auto *Index = Instr->getIndex(I); | 
 |     const SizeT Elem = Index->getValue(); | 
 |     auto *ExtElmt = makeReg(ElementType); | 
 |     if (Elem < NumElements) { | 
 |       lowerExtractElement( | 
 |           InstExtractElement::create(Func, ExtElmt, Src0, Index)); | 
 |     } else { | 
 |       lowerExtractElement(InstExtractElement::create( | 
 |           Func, ExtElmt, Src1, | 
 |           Ctx->getConstantInt32(Index->getValue() - NumElements))); | 
 |     } | 
 |     auto *NewT = makeReg(DestTy); | 
 |     lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt, | 
 |                                                  Ctx->getConstantInt32(I))); | 
 |     T = NewT; | 
 |   } | 
 |   _mov(Dest, T); | 
 | } | 
 |  | 
 | void TargetARM32::lowerSelect(const InstSelect *Instr) { | 
 |   Variable *Dest = Instr->getDest(); | 
 |   Type DestTy = Dest->getType(); | 
 |   Operand *SrcT = Instr->getTrueOperand(); | 
 |   Operand *SrcF = Instr->getFalseOperand(); | 
 |   Operand *Condition = Instr->getCondition(); | 
 |  | 
 |   if (!isVectorType(DestTy)) { | 
 |     lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT), | 
 |                        legalizeUndef(SrcF)); | 
 |     return; | 
 |   } | 
 |  | 
 |   Type TType = DestTy; | 
 |   switch (DestTy) { | 
 |   default: | 
 |     llvm::report_fatal_error("Unexpected type for vector select."); | 
 |   case IceType_v4i1: | 
 |     TType = IceType_v4i32; | 
 |     break; | 
 |   case IceType_v8i1: | 
 |     TType = IceType_v8i16; | 
 |     break; | 
 |   case IceType_v16i1: | 
 |     TType = IceType_v16i8; | 
 |     break; | 
 |   case IceType_v4f32: | 
 |     TType = IceType_v4i32; | 
 |     break; | 
 |   case IceType_v4i32: | 
 |   case IceType_v8i16: | 
 |   case IceType_v16i8: | 
 |     break; | 
 |   } | 
 |   auto *T = makeReg(TType); | 
 |   lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); | 
 |   auto *SrcTR = legalizeToReg(SrcT); | 
 |   auto *SrcFR = legalizeToReg(SrcF); | 
 |   _vbsl(T, SrcTR, SrcFR)->setDestRedefined(); | 
 |   _mov(Dest, T); | 
 | } | 
 |  | 
 | void TargetARM32::lowerStore(const InstStore *Instr) { | 
 |   Operand *Value = Instr->getData(); | 
 |   Operand *Addr = Instr->getAddr(); | 
 |   OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); | 
 |   Type Ty = NewAddr->getType(); | 
 |  | 
 |   if (Ty == IceType_i64) { | 
 |     Value = legalizeUndef(Value); | 
 |     Variable *ValueHi = legalizeToReg(hiOperand(Value)); | 
 |     Variable *ValueLo = legalizeToReg(loOperand(Value)); | 
 |     _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr))); | 
 |     _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr))); | 
 |   } else { | 
 |     Variable *ValueR = legalizeToReg(Value); | 
 |     _str(ValueR, NewAddr); | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::doAddressOptStore() { | 
 |   Inst *Instr = iteratorToInst(Context.getCur()); | 
 |   assert(llvm::isa<InstStore>(Instr)); | 
 |   Operand *Src = Instr->getSrc(0); | 
 |   Operand *Addr = Instr->getSrc(1); | 
 |   if (OperandARM32Mem *Mem = | 
 |           formAddressingMode(Src->getType(), Func, Instr, Addr)) { | 
 |     Instr->setDeleted(); | 
 |     Context.insert<InstStore>(Src, Mem); | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::lowerSwitch(const InstSwitch *Instr) { | 
 |   // This implements the most naive possible lowering. | 
 |   // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | 
 |   Operand *Src0 = Instr->getComparison(); | 
 |   SizeT NumCases = Instr->getNumCases(); | 
 |   if (Src0->getType() == IceType_i64) { | 
 |     Src0 = legalizeUndef(Src0); | 
 |     Variable *Src0Lo = legalizeToReg(loOperand(Src0)); | 
 |     Variable *Src0Hi = legalizeToReg(hiOperand(Src0)); | 
 |     for (SizeT I = 0; I < NumCases; ++I) { | 
 |       Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I)); | 
 |       Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32); | 
 |       ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex); | 
 |       ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex); | 
 |       _cmp(Src0Lo, ValueLo); | 
 |       _cmp(Src0Hi, ValueHi, CondARM32::EQ); | 
 |       _br(Instr->getLabel(I), CondARM32::EQ); | 
 |     } | 
 |     _br(Instr->getLabelDefault()); | 
 |     return; | 
 |   } | 
 |  | 
 |   Variable *Src0Var = legalizeToReg(Src0); | 
 |   // If Src0 is not an i32, we left shift it -- see the icmp lowering for the | 
 |   // reason. | 
 |   assert(Src0Var->mustHaveReg()); | 
 |   const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 
 |   assert(ShiftAmt < 32); | 
 |   if (ShiftAmt > 0) { | 
 |     Operand *ShAmtImm = shAmtImm(ShiftAmt); | 
 |     Variable *T = makeReg(IceType_i32); | 
 |     _lsl(T, Src0Var, ShAmtImm); | 
 |     Src0Var = T; | 
 |   } | 
 |  | 
 |   for (SizeT I = 0; I < NumCases; ++I) { | 
 |     Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt); | 
 |     Value = legalize(Value, Legal_Reg | Legal_Flex); | 
 |     _cmp(Src0Var, Value); | 
 |     _br(Instr->getLabel(I), CondARM32::EQ); | 
 |   } | 
 |   _br(Instr->getLabelDefault()); | 
 | } | 
 |  | 
 | void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) { | 
 |   UnimplementedLoweringError(this, Instr); | 
 | } | 
 |  | 
 | void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { | 
 |   _trap(); | 
 | } | 
 |  | 
 | namespace { | 
 | // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables, | 
 | // and fp constants will need access to the GOT address. | 
 | bool operandNeedsGot(const Operand *Opnd) { | 
 |   if (llvm::isa<ConstantRelocatable>(Opnd)) { | 
 |     return true; | 
 |   } | 
 |  | 
 |   if (llvm::isa<ConstantFloat>(Opnd)) { | 
 |     uint32_t _; | 
 |     return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_); | 
 |   } | 
 |  | 
 |   const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd); | 
 |   if (F64 != nullptr) { | 
 |     uint32_t _; | 
 |     return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) && | 
 |            !isFloatingPointZero(F64); | 
 |   } | 
 |  | 
 |   return false; | 
 | } | 
 |  | 
 | // Returns whether Phi needs the GOT address (which it does if any of its | 
 | // operands needs the GOT address.) | 
 | bool phiNeedsGot(const InstPhi *Phi) { | 
 |   if (Phi->isDeleted()) { | 
 |     return false; | 
 |   } | 
 |  | 
 |   for (SizeT I = 0; I < Phi->getSrcSize(); ++I) { | 
 |     if (operandNeedsGot(Phi->getSrc(I))) { | 
 |       return true; | 
 |     } | 
 |   } | 
 |  | 
 |   return false; | 
 | } | 
 |  | 
 | // Returns whether **any** phi in Node needs the GOT address. | 
 | bool anyPhiInNodeNeedsGot(CfgNode *Node) { | 
 |   for (auto &Inst : Node->getPhis()) { | 
 |     if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) { | 
 |       return true; | 
 |     } | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | } // end of anonymous namespace | 
 |  | 
 | void TargetARM32::prelowerPhis() { | 
 |   CfgNode *Node = Context.getNode(); | 
 |  | 
 |   if (SandboxingType == ST_Nonsfi) { | 
 |     assert(GotPtr != nullptr); | 
 |     if (anyPhiInNodeNeedsGot(Node)) { | 
 |       // If any phi instruction needs the GOT address, we place a | 
 |       //   fake-use GotPtr | 
 |       // in Node to prevent the GotPtr's initialization from being dead code | 
 |       // eliminated. | 
 |       Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr)); | 
 |     } | 
 |   } | 
 |  | 
 |   PhiLowering::prelowerPhis32Bit(this, Node, Func); | 
 | } | 
 |  | 
 | Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) { | 
 |   Variable *Reg = makeReg(Ty, RegNum); | 
 |   Context.insert<InstFakeDef>(Reg); | 
 |   assert(isVectorType(Ty)); | 
 |   _veor(Reg, Reg, Reg); | 
 |   return Reg; | 
 | } | 
 |  | 
 | // Helper for legalize() to emit the right code to lower an operand to a | 
 | // register of the appropriate type. | 
 | Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) { | 
 |   Type Ty = Src->getType(); | 
 |   Variable *Reg = makeReg(Ty, RegNum); | 
 |   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) { | 
 |     _ldr(Reg, Mem); | 
 |   } else { | 
 |     _mov(Reg, Src); | 
 |   } | 
 |   return Reg; | 
 | } | 
 |  | 
 | // TODO(jpp): remove unneeded else clauses in legalize. | 
 | Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, | 
 |                                RegNumT RegNum) { | 
 |   Type Ty = From->getType(); | 
 |   // Assert that a physical register is allowed. To date, all calls to | 
 |   // legalize() allow a physical register. Legal_Flex converts registers to the | 
 |   // right type OperandARM32FlexReg as needed. | 
 |   assert(Allowed & Legal_Reg); | 
 |  | 
 |   // Copied ipsis literis from TargetX86Base<Machine>. | 
 |   if (RegNum.hasNoValue()) { | 
 |     if (Variable *Subst = getContext().availabilityGet(From)) { | 
 |       // At this point we know there is a potential substitution available. | 
 |       if (!Subst->isRematerializable() && Subst->mustHaveReg() && | 
 |           !Subst->hasReg()) { | 
 |         // At this point we know the substitution will have a register. | 
 |         if (From->getType() == Subst->getType()) { | 
 |           // At this point we know the substitution's register is compatible. | 
 |           return Subst; | 
 |         } | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   // Go through the various types of operands: OperandARM32Mem, | 
 |   // OperandARM32Flex, Constant, and Variable. Given the above assertion, if | 
 |   // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we | 
 |   // can always copy to a register. | 
 |   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { | 
 |     // Before doing anything with a Mem operand, we need to ensure that the | 
 |     // Base and Index components are in physical registers. | 
 |     Variable *Base = Mem->getBase(); | 
 |     Variable *Index = Mem->getIndex(); | 
 |     ConstantInteger32 *Offset = Mem->getOffset(); | 
 |     assert(Index == nullptr || Offset == nullptr); | 
 |     Variable *RegBase = nullptr; | 
 |     Variable *RegIndex = nullptr; | 
 |     assert(Base); | 
 |     RegBase = llvm::cast<Variable>( | 
 |         legalize(Base, Legal_Reg | Legal_Rematerializable)); | 
 |     assert(Ty < MemTraitsSize); | 
 |     if (Index) { | 
 |       assert(Offset == nullptr); | 
 |       assert(MemTraits[Ty].CanHaveIndex); | 
 |       RegIndex = legalizeToReg(Index); | 
 |     } | 
 |     if (Offset && Offset->getValue() != 0) { | 
 |       assert(Index == nullptr); | 
 |       static constexpr bool ZeroExt = false; | 
 |       assert(MemTraits[Ty].CanHaveImm); | 
 |       if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) { | 
 |         llvm::report_fatal_error("Invalid memory offset."); | 
 |       } | 
 |     } | 
 |  | 
 |     // Create a new operand if there was a change. | 
 |     if (Base != RegBase || Index != RegIndex) { | 
 |       // There is only a reg +/- reg or reg + imm form. | 
 |       // Figure out which to re-create. | 
 |       if (RegIndex) { | 
 |         Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex, | 
 |                                       Mem->getShiftOp(), Mem->getShiftAmt(), | 
 |                                       Mem->getAddrMode()); | 
 |       } else { | 
 |         Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset, | 
 |                                       Mem->getAddrMode()); | 
 |       } | 
 |     } | 
 |     if (Allowed & Legal_Mem) { | 
 |       From = Mem; | 
 |     } else { | 
 |       Variable *Reg = makeReg(Ty, RegNum); | 
 |       _ldr(Reg, Mem); | 
 |       From = Reg; | 
 |     } | 
 |     return From; | 
 |   } | 
 |  | 
 |   if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { | 
 |     if (!(Allowed & Legal_Flex)) { | 
 |       if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { | 
 |         if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { | 
 |           From = FlexReg->getReg(); | 
 |           // Fall through and let From be checked as a Variable below, where it | 
 |           // may or may not need a register. | 
 |         } else { | 
 |           return copyToReg(Flex, RegNum); | 
 |         } | 
 |       } else { | 
 |         return copyToReg(Flex, RegNum); | 
 |       } | 
 |     } else { | 
 |       return From; | 
 |     } | 
 |   } | 
 |  | 
 |   if (llvm::isa<Constant>(From)) { | 
 |     if (llvm::isa<ConstantUndef>(From)) { | 
 |       From = legalizeUndef(From, RegNum); | 
 |       if (isVectorType(Ty)) | 
 |         return From; | 
 |     } | 
 |     // There should be no constants of vector type (other than undef). | 
 |     assert(!isVectorType(Ty)); | 
 |     if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { | 
 |       uint32_t RotateAmt; | 
 |       uint32_t Immed_8; | 
 |       uint32_t Value = static_cast<uint32_t>(C32->getValue()); | 
 |       if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { | 
 |         // The immediate can be encoded as a Flex immediate. We may return the | 
 |         // Flex operand if the caller has Allow'ed it. | 
 |         auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 
 |         const bool CanBeFlex = Allowed & Legal_Flex; | 
 |         if (CanBeFlex) | 
 |           return OpF; | 
 |         return copyToReg(OpF, RegNum); | 
 |       } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt, | 
 |                                                  &Immed_8)) { | 
 |         // Even though the immediate can't be encoded as a Flex operand, its | 
 |         // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit | 
 |         // constant with a single instruction. | 
 |         auto *InvOpF = | 
 |             OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 
 |         Variable *Reg = makeReg(Ty, RegNum); | 
 |         _mvn(Reg, InvOpF); | 
 |         return Reg; | 
 |       } else { | 
 |         // Do a movw/movt to a register. | 
 |         Variable *Reg = makeReg(Ty, RegNum); | 
 |         uint32_t UpperBits = (Value >> 16) & 0xFFFF; | 
 |         _movw(Reg, | 
 |               UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); | 
 |         if (UpperBits != 0) { | 
 |           _movt(Reg, Ctx->getConstantInt32(UpperBits)); | 
 |         } | 
 |         return Reg; | 
 |       } | 
 |     } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 
 |       Variable *Reg = makeReg(Ty, RegNum); | 
 |       if (SandboxingType != ST_Nonsfi) { | 
 |         _movw(Reg, C); | 
 |         _movt(Reg, C); | 
 |       } else { | 
 |         auto *GotAddr = legalizeToReg(GotPtr); | 
 |         GlobalString CGotoffName = createGotoffRelocation(C); | 
 |         loadNamedConstantRelocatablePIC( | 
 |             CGotoffName, Reg, [this, Reg](Variable *PC) { | 
 |               _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg)); | 
 |             }); | 
 |         _add(Reg, GotAddr, Reg); | 
 |       } | 
 |       return Reg; | 
 |     } else { | 
 |       assert(isScalarFloatingType(Ty)); | 
 |       uint32_t ModifiedImm; | 
 |       if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { | 
 |         Variable *T = makeReg(Ty, RegNum); | 
 |         _mov(T, | 
 |              OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); | 
 |         return T; | 
 |       } | 
 |  | 
 |       if (Ty == IceType_f64 && isFloatingPointZero(From)) { | 
 |         // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 | 
 |         // because ARM does not have a veor instruction with S registers. | 
 |         Variable *T = makeReg(IceType_f64, RegNum); | 
 |         Context.insert<InstFakeDef>(T); | 
 |         _veor(T, T, T); | 
 |         return T; | 
 |       } | 
 |  | 
 |       // Load floats/doubles from literal pool. | 
 |       auto *CFrom = llvm::cast<Constant>(From); | 
 |       assert(CFrom->getShouldBePooled()); | 
 |       Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName()); | 
 |       Variable *BaseReg = nullptr; | 
 |       if (SandboxingType == ST_Nonsfi) { | 
 |         // vldr does not support the [base, index] addressing mode, so we need | 
 |         // to legalize Offset to a register. Otherwise, we could simply | 
 |         //   vldr dest, [got, reg(Offset)] | 
 |         BaseReg = legalizeToReg(Offset); | 
 |       } else { | 
 |         BaseReg = makeReg(getPointerType()); | 
 |         _movw(BaseReg, Offset); | 
 |         _movt(BaseReg, Offset); | 
 |       } | 
 |       From = formMemoryOperand(BaseReg, Ty); | 
 |       return copyToReg(From, RegNum); | 
 |     } | 
 |   } | 
 |  | 
 |   if (auto *Var = llvm::dyn_cast<Variable>(From)) { | 
 |     if (Var->isRematerializable()) { | 
 |       if (Allowed & Legal_Rematerializable) { | 
 |         return From; | 
 |       } | 
 |  | 
 |       Variable *T = makeReg(Var->getType(), RegNum); | 
 |       _mov(T, Var); | 
 |       return T; | 
 |     } | 
 |     // Check if the variable is guaranteed a physical register. This can happen | 
 |     // either when the variable is pre-colored or when it is assigned infinite | 
 |     // weight. | 
 |     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); | 
 |     // We need a new physical register for the operand if: | 
 |     //   Mem is not allowed and Var isn't guaranteed a physical | 
 |     //   register, or | 
 |     //   RegNum is required and Var->getRegNum() doesn't match. | 
 |     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 
 |         (RegNum.hasValue() && (RegNum != Var->getRegNum()))) { | 
 |       From = copyToReg(From, RegNum); | 
 |     } | 
 |     return From; | 
 |   } | 
 |   llvm::report_fatal_error("Unhandled operand kind in legalize()"); | 
 |  | 
 |   return From; | 
 | } | 
 |  | 
 | /// Provide a trivial wrapper to legalize() for this common usage. | 
 | Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) { | 
 |   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); | 
 | } | 
 |  | 
 | /// Legalize undef values to concrete values. | 
 | Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) { | 
 |   Type Ty = From->getType(); | 
 |   if (llvm::isa<ConstantUndef>(From)) { | 
 |     // Lower undefs to zero. Another option is to lower undefs to an | 
 |     // uninitialized register; however, using an uninitialized register results | 
 |     // in less predictable code. | 
 |     // | 
 |     // If in the future the implementation is changed to lower undef values to | 
 |     // uninitialized registers, a FakeDef will be needed: | 
 |     // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to | 
 |     // ensure that the live range of Reg is not overestimated. If the constant | 
 |     // being lowered is a 64 bit value, then the result should be split and the | 
 |     // lo and hi components will need to go in uninitialized registers. | 
 |     if (isVectorType(Ty)) | 
 |       return makeVectorOfZeros(Ty, RegNum); | 
 |     return Ctx->getConstantZero(Ty); | 
 |   } | 
 |   return From; | 
 | } | 
 |  | 
 | OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { | 
 |   auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); | 
 |   // It may be the case that address mode optimization already creates an | 
 |   // OperandARM32Mem, so in that case it wouldn't need another level of | 
 |   // transformation. | 
 |   if (Mem) { | 
 |     return llvm::cast<OperandARM32Mem>(legalize(Mem)); | 
 |   } | 
 |   // If we didn't do address mode optimization, then we only have a | 
 |   // base/offset to work with. ARM always requires a base register, so | 
 |   // just use that to hold the operand. | 
 |   auto *Base = llvm::cast<Variable>( | 
 |       legalize(Operand, Legal_Reg | Legal_Rematerializable)); | 
 |   return OperandARM32Mem::create( | 
 |       Func, Ty, Base, | 
 |       llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); | 
 | } | 
 |  | 
 | Variable64On32 *TargetARM32::makeI64RegPair() { | 
 |   Variable64On32 *Reg = | 
 |       llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | 
 |   Reg->setMustHaveReg(); | 
 |   Reg->initHiLo(Func); | 
 |   Reg->getLo()->setMustNotHaveReg(); | 
 |   Reg->getHi()->setMustNotHaveReg(); | 
 |   return Reg; | 
 | } | 
 |  | 
 | Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) { | 
 |   // There aren't any 64-bit integer registers for ARM32. | 
 |   assert(Type != IceType_i64); | 
 |   assert(AllowTemporaryWithNoReg || RegNum.hasValue()); | 
 |   Variable *Reg = Func->makeVariable(Type); | 
 |   if (RegNum.hasValue()) | 
 |     Reg->setRegNum(RegNum); | 
 |   else | 
 |     Reg->setMustHaveReg(); | 
 |   return Reg; | 
 | } | 
 |  | 
 | void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align, | 
 |                                     RegNumT TmpRegNum) { | 
 |   assert(llvm::isPowerOf2_32(Align)); | 
 |   uint32_t RotateAmt; | 
 |   uint32_t Immed_8; | 
 |   Operand *Mask; | 
 |   // Use AND or BIC to mask off the bits, depending on which immediate fits (if | 
 |   // it fits at all). Assume Align is usually small, in which case BIC works | 
 |   // better. Thus, this rounds down to the alignment. | 
 |   if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { | 
 |     Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex, | 
 |                     TmpRegNum); | 
 |     _bic(Reg, Reg, Mask); | 
 |   } else { | 
 |     Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex, | 
 |                     TmpRegNum); | 
 |     _and(Reg, Reg, Mask); | 
 |   } | 
 | } | 
 |  | 
 | void TargetARM32::postLower() { | 
 |   if (Func->getOptLevel() == Opt_m1) | 
 |     return; | 
 |   markRedefinitions(); | 
 |   Context.availabilityUpdate(); | 
 | } | 
 |  | 
 | void TargetARM32::makeRandomRegisterPermutation( | 
 |     llvm::SmallVectorImpl<RegNumT> &Permutation, | 
 |     const SmallBitVector &ExcludeRegisters, uint64_t Salt) const { | 
 |   (void)Permutation; | 
 |   (void)ExcludeRegisters; | 
 |   (void)Salt; | 
 |   UnimplementedError(getFlags()); | 
 | } | 
 |  | 
 | void TargetARM32::emit(const ConstantInteger32 *C) const { | 
 |   if (!BuildDefs::dump()) | 
 |     return; | 
 |   Ostream &Str = Ctx->getStrEmit(); | 
 |   Str << "#" << C->getValue(); | 
 | } | 
 |  | 
 | void TargetARM32::emit(const ConstantInteger64 *) const { | 
 |   llvm::report_fatal_error("Not expecting to emit 64-bit integers"); | 
 | } | 
 |  | 
 | void TargetARM32::emit(const ConstantFloat *C) const { | 
 |   (void)C; | 
 |   UnimplementedError(getFlags()); | 
 | } | 
 |  | 
 | void TargetARM32::emit(const ConstantDouble *C) const { | 
 |   (void)C; | 
 |   UnimplementedError(getFlags()); | 
 | } | 
 |  | 
 | void TargetARM32::emit(const ConstantUndef *) const { | 
 |   llvm::report_fatal_error("undef value encountered by emitter."); | 
 | } | 
 |  | 
 | void TargetARM32::emit(const ConstantRelocatable *C) const { | 
 |   if (!BuildDefs::dump()) | 
 |     return; | 
 |   Ostream &Str = Ctx->getStrEmit(); | 
 |   Str << "#"; | 
 |   emitWithoutPrefix(C); | 
 | } | 
 |  | 
 | void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean, | 
 |                                      Operand *TrueValue, Operand *FalseValue) { | 
 |   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 
 |  | 
 |   assert(Boolean->getType() == IceType_i1); | 
 |  | 
 |   bool NeedsAnd1 = false; | 
 |   if (TrueValue->getType() == IceType_i1) { | 
 |     assert(FalseValue->getType() == IceType_i1); | 
 |  | 
 |     Variable *TrueValueV = Func->makeVariable(IceType_i1); | 
 |     SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue); | 
 |     TrueValue = TrueValueV; | 
 |  | 
 |     Variable *FalseValueV = Func->makeVariable(IceType_i1); | 
 |     SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue); | 
 |     FalseValue = FalseValueV; | 
 |  | 
 |     NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No; | 
 |   } | 
 |  | 
 |   Variable *DestLo = (Dest->getType() == IceType_i64) | 
 |                          ? llvm::cast<Variable>(loOperand(Dest)) | 
 |                          : Dest; | 
 |   Variable *DestHi = (Dest->getType() == IceType_i64) | 
 |                          ? llvm::cast<Variable>(hiOperand(Dest)) | 
 |                          : nullptr; | 
 |   Operand *FalseValueLo = (FalseValue->getType() == IceType_i64) | 
 |                               ? loOperand(FalseValue) | 
 |                               : FalseValue; | 
 |   Operand *FalseValueHi = | 
 |       (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr; | 
 |  | 
 |   Operand *TrueValueLo = | 
 |       (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue; | 
 |   Operand *TrueValueHi = | 
 |       (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr; | 
 |  | 
 |   Variable *T_Lo = makeReg(DestLo->getType()); | 
 |   Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType()); | 
 |  | 
 |   _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex)); | 
 |   if (DestHi) { | 
 |     _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex)); | 
 |   } | 
 |  | 
 |   CondWhenTrue Cond(CondARM32::kNone); | 
 |   // FlagsWereSet is used to determine wether Boolean was folded or not. If not, | 
 |   // add an explicit _tst instruction below. | 
 |   bool FlagsWereSet = false; | 
 |   if (const Inst *Producer = Computations.getProducerOf(Boolean)) { | 
 |     switch (Producer->getKind()) { | 
 |     default: | 
 |       llvm::report_fatal_error("Unexpected producer."); | 
 |     case Inst::Icmp: { | 
 |       Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); | 
 |       FlagsWereSet = true; | 
 |     } break; | 
 |     case Inst::Fcmp: { | 
 |       Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); | 
 |       FlagsWereSet = true; | 
 |     } break; | 
 |     case Inst::Cast: { | 
 |       const auto *CastProducer = llvm::cast<InstCast>(Producer); | 
 |       assert(CastProducer->getCastKind() == InstCast::Trunc); | 
 |       Boolean = CastProducer->getSrc(0); | 
 |       // No flags were set, so a _tst(Src, 1) will be emitted below. Don't | 
 |       // bother legalizing Src to a Reg because it will be legalized before | 
 |       // emitting the tst instruction. | 
 |       FlagsWereSet = false; | 
 |     } break; | 
 |     case Inst::Arithmetic: { | 
 |       // This is a special case: we eagerly assumed Producer could be folded, | 
 |       // but in reality, it can't. No reason to panic: we just lower it using | 
 |       // the regular lowerArithmetic helper. | 
 |       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer); | 
 |       lowerArithmetic(ArithProducer); | 
 |       Boolean = ArithProducer->getDest(); | 
 |       // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't | 
 |       // bother legalizing Dest to a Reg because it will be legalized before | 
 |       // emitting  the tst instruction. | 
 |       FlagsWereSet = false; | 
 |     } break; | 
 |     } | 
 |   } | 
 |  | 
 |   if (!FlagsWereSet) { | 
 |     // No flags have been set, so emit a tst Boolean, 1. | 
 |     Variable *Src = legalizeToReg(Boolean); | 
 |     _tst(Src, _1); | 
 |     Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero. | 
 |   } | 
 |  | 
 |   if (Cond.WhenTrue0 == CondARM32::kNone) { | 
 |     assert(Cond.WhenTrue1 == CondARM32::kNone); | 
 |   } else { | 
 |     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex), | 
 |                    Cond.WhenTrue0); | 
 |     if (DestHi) { | 
 |       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex), | 
 |                      Cond.WhenTrue0); | 
 |     } | 
 |   } | 
 |  | 
 |   if (Cond.WhenTrue1 != CondARM32::kNone) { | 
 |     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex), | 
 |                    Cond.WhenTrue1); | 
 |     if (DestHi) { | 
 |       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex), | 
 |                      Cond.WhenTrue1); | 
 |     } | 
 |   } | 
 |  | 
 |   if (NeedsAnd1) { | 
 |     // We lowered something that is unsafe (i.e., can't provably be zero or | 
 |     // one). Truncate the result. | 
 |     _and(T_Lo, T_Lo, _1); | 
 |   } | 
 |  | 
 |   _mov(DestLo, T_Lo); | 
 |   if (DestHi) { | 
 |     _mov(DestHi, T_Hi); | 
 |   } | 
 | } | 
 |  | 
 | TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest, | 
 |                                                   Operand *Boolean) { | 
 |   assert(Boolean->getType() == IceType_i1); | 
 |   Variable *T = makeReg(IceType_i1); | 
 |   Operand *_0 = | 
 |       legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex); | 
 |   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 
 |  | 
 |   SafeBoolChain Safe = SBC_Yes; | 
 |   if (const Inst *Producer = Computations.getProducerOf(Boolean)) { | 
 |     switch (Producer->getKind()) { | 
 |     default: | 
 |       llvm::report_fatal_error("Unexpected producer."); | 
 |     case Inst::Icmp: { | 
 |       _mov(T, _0); | 
 |       CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); | 
 |       assert(Cond.WhenTrue0 != CondARM32::AL); | 
 |       assert(Cond.WhenTrue0 != CondARM32::kNone); | 
 |       assert(Cond.WhenTrue1 == CondARM32::kNone); | 
 |       _mov_redefined(T, _1, Cond.WhenTrue0); | 
 |     } break; | 
 |     case Inst::Fcmp: { | 
 |       _mov(T, _0); | 
 |       Inst *MovZero = Context.getLastInserted(); | 
 |       CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); | 
 |       if (Cond.WhenTrue0 == CondARM32::AL) { | 
 |         assert(Cond.WhenTrue1 == CondARM32::kNone); | 
 |         MovZero->setDeleted(); | 
 |         _mov(T, _1); | 
 |       } else if (Cond.WhenTrue0 != CondARM32::kNone) { | 
 |         _mov_redefined(T, _1, Cond.WhenTrue0); | 
 |       } | 
 |       if (Cond.WhenTrue1 != CondARM32::kNone) { | 
 |         assert(Cond.WhenTrue0 != CondARM32::kNone); | 
 |         assert(Cond.WhenTrue0 != CondARM32::AL); | 
 |         _mov_redefined(T, _1, Cond.WhenTrue1); | 
 |       } | 
 |     } break; | 
 |     case Inst::Cast: { | 
 |       const auto *CastProducer = llvm::cast<InstCast>(Producer); | 
 |       assert(CastProducer->getCastKind() == InstCast::Trunc); | 
 |       Operand *Src = CastProducer->getSrc(0); | 
 |       if (Src->getType() == IceType_i64) | 
 |         Src = loOperand(Src); | 
 |       _mov(T, legalize(Src, Legal_Reg | Legal_Flex)); | 
 |       Safe = SBC_No; | 
 |     } break; | 
 |     case Inst::Arithmetic: { | 
 |       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer); | 
 |       Safe = lowerInt1Arithmetic(ArithProducer); | 
 |       _mov(T, ArithProducer->getDest()); | 
 |     } break; | 
 |     } | 
 |   } else { | 
 |     _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex)); | 
 |   } | 
 |  | 
 |   _mov(Dest, T); | 
 |   return Safe; | 
 | } | 
 |  | 
 | namespace { | 
 | namespace BoolFolding { | 
 | bool shouldTrackProducer(const Inst &Instr) { | 
 |   switch (Instr.getKind()) { | 
 |   default: | 
 |     return false; | 
 |   case Inst::Icmp: | 
 |   case Inst::Fcmp: | 
 |     return true; | 
 |   case Inst::Cast: { | 
 |     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) { | 
 |     default: | 
 |       return false; | 
 |     case InstCast::Trunc: | 
 |       return true; | 
 |     } | 
 |   } | 
 |   case Inst::Arithmetic: { | 
 |     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | 
 |     default: | 
 |       return false; | 
 |     case InstArithmetic::And: | 
 |     case InstArithmetic::Or: | 
 |       return true; | 
 |     } | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | bool isValidConsumer(const Inst &Instr) { | 
 |   switch (Instr.getKind()) { | 
 |   default: | 
 |     return false; | 
 |   case Inst::Br: | 
 |     return true; | 
 |   case Inst::Select: | 
 |     return !isVectorType(Instr.getDest()->getType()); | 
 |   case Inst::Cast: { | 
 |     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) { | 
 |     default: | 
 |       return false; | 
 |     case InstCast::Sext: | 
 |       return !isVectorType(Instr.getDest()->getType()); | 
 |     case InstCast::Zext: | 
 |       return !isVectorType(Instr.getDest()->getType()); | 
 |     } | 
 |   } | 
 |   case Inst::Arithmetic: { | 
 |     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | 
 |     default: | 
 |       return false; | 
 |     case InstArithmetic::And: | 
 |       return !isVectorType(Instr.getDest()->getType()); | 
 |     case InstArithmetic::Or: | 
 |       return !isVectorType(Instr.getDest()->getType()); | 
 |     } | 
 |   } | 
 |   } | 
 | } | 
 | } // end of namespace BoolFolding | 
 |  | 
 | namespace FpFolding { | 
 | bool shouldTrackProducer(const Inst &Instr) { | 
 |   switch (Instr.getKind()) { | 
 |   default: | 
 |     return false; | 
 |   case Inst::Arithmetic: { | 
 |     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | 
 |     default: | 
 |       return false; | 
 |     case InstArithmetic::Fmul: | 
 |       return true; | 
 |     } | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | bool isValidConsumer(const Inst &Instr) { | 
 |   switch (Instr.getKind()) { | 
 |   default: | 
 |     return false; | 
 |   case Inst::Arithmetic: { | 
 |     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | 
 |     default: | 
 |       return false; | 
 |     case InstArithmetic::Fadd: | 
 |     case InstArithmetic::Fsub: | 
 |       return true; | 
 |     } | 
 |   } | 
 |   } | 
 | } | 
 | } // end of namespace FpFolding | 
 |  | 
 | namespace IntFolding { | 
 | bool shouldTrackProducer(const Inst &Instr) { | 
 |   switch (Instr.getKind()) { | 
 |   default: | 
 |     return false; | 
 |   case Inst::Arithmetic: { | 
 |     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | 
 |     default: | 
 |       return false; | 
 |     case InstArithmetic::Mul: | 
 |       return true; | 
 |     } | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | bool isValidConsumer(const Inst &Instr) { | 
 |   switch (Instr.getKind()) { | 
 |   default: | 
 |     return false; | 
 |   case Inst::Arithmetic: { | 
 |     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | 
 |     default: | 
 |       return false; | 
 |     case InstArithmetic::Add: | 
 |     case InstArithmetic::Sub: | 
 |       return true; | 
 |     } | 
 |   } | 
 |   } | 
 | } | 
 | } // end of namespace FpFolding | 
 | } // end of anonymous namespace | 
 |  | 
 | void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) { | 
 |   for (Inst &Instr : Node->getInsts()) { | 
 |     // Check whether Instr is a valid producer. | 
 |     Variable *Dest = Instr.getDest(); | 
 |     if (!Instr.isDeleted() // only consider non-deleted instructions; and | 
 |         && Dest            // only instructions with an actual dest var; and | 
 |         && Dest->getType() == IceType_i1 // only bool-type dest vars; and | 
 |         && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. | 
 |       KnownComputations.emplace(Dest->getIndex(), | 
 |                                 ComputationEntry(&Instr, IceType_i1)); | 
 |     } | 
 |     if (!Instr.isDeleted() // only consider non-deleted instructions; and | 
 |         && Dest            // only instructions with an actual dest var; and | 
 |         && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and | 
 |         && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr. | 
 |       KnownComputations.emplace(Dest->getIndex(), | 
 |                                 ComputationEntry(&Instr, Dest->getType())); | 
 |     } | 
 |     if (!Instr.isDeleted() // only consider non-deleted instructions; and | 
 |         && Dest            // only instructions with an actual dest var; and | 
 |         && Dest->getType() == IceType_i32            // i32 only dest vars; and | 
 |         && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr. | 
 |       KnownComputations.emplace(Dest->getIndex(), | 
 |                                 ComputationEntry(&Instr, IceType_i32)); | 
 |     } | 
 |     // Check each src variable against the map. | 
 |     FOREACH_VAR_IN_INST(Var, Instr) { | 
 |       SizeT VarNum = Var->getIndex(); | 
 |       auto ComputationIter = KnownComputations.find(VarNum); | 
 |       if (ComputationIter == KnownComputations.end()) { | 
 |         continue; | 
 |       } | 
 |  | 
 |       ++ComputationIter->second.NumUses; | 
 |       switch (ComputationIter->second.ComputationType) { | 
 |       default: | 
 |         KnownComputations.erase(VarNum); | 
 |         continue; | 
 |       case IceType_i1: | 
 |         if (!BoolFolding::isValidConsumer(Instr)) { | 
 |           KnownComputations.erase(VarNum); | 
 |           continue; | 
 |         } | 
 |         break; | 
 |       case IceType_i32: | 
 |         if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) { | 
 |           KnownComputations.erase(VarNum); | 
 |           continue; | 
 |         } | 
 |         break; | 
 |       case IceType_f32: | 
 |       case IceType_f64: | 
 |         if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) { | 
 |           KnownComputations.erase(VarNum); | 
 |           continue; | 
 |         } | 
 |         break; | 
 |       } | 
 |  | 
 |       if (Instr.isLastUse(Var)) { | 
 |         ComputationIter->second.IsLiveOut = false; | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); | 
 |        Iter != End;) { | 
 |     // Disable the folding if its dest may be live beyond this block. | 
 |     if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) { | 
 |       Iter = KnownComputations.erase(Iter); | 
 |       continue; | 
 |     } | 
 |  | 
 |     // Mark as "dead" rather than outright deleting. This is so that other | 
 |     // peephole style optimizations during or before lowering have access to | 
 |     // this instruction in undeleted form. See for example | 
 |     // tryOptimizedCmpxchgCmpBr(). | 
 |     Iter->second.Instr->setDead(); | 
 |     ++Iter; | 
 |   } | 
 | } | 
 |  | 
 | TargetARM32::Sandboxer::Sandboxer(TargetARM32 *Target, | 
 |                                   InstBundleLock::Option BundleOption) | 
 |     : Target(Target), BundleOption(BundleOption) {} | 
 |  | 
 | TargetARM32::Sandboxer::~Sandboxer() {} | 
 |  | 
 | namespace { | 
 | OperandARM32FlexImm *indirectBranchBicMask(Cfg *Func) { | 
 |   constexpr uint32_t Imm8 = 0xFC; // 0xC000000F | 
 |   constexpr uint32_t RotateAmt = 2; | 
 |   return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt); | 
 | } | 
 |  | 
 | OperandARM32FlexImm *memOpBicMask(Cfg *Func) { | 
 |   constexpr uint32_t Imm8 = 0x0C; // 0xC0000000 | 
 |   constexpr uint32_t RotateAmt = 2; | 
 |   return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt); | 
 | } | 
 |  | 
 | static bool baseNeedsBic(Variable *Base) { | 
 |   return Base->getRegNum() != RegARM32::Reg_r9 && | 
 |          Base->getRegNum() != RegARM32::Reg_sp; | 
 | } | 
 | } // end of anonymous namespace | 
 |  | 
 | void TargetARM32::Sandboxer::createAutoBundle() { | 
 |   Bundler = makeUnique<AutoBundle>(Target, BundleOption); | 
 | } | 
 |  | 
 | void TargetARM32::Sandboxer::add_sp(Operand *AddAmount) { | 
 |   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); | 
 |   if (!Target->NeedSandboxing) { | 
 |     Target->_add(SP, SP, AddAmount); | 
 |     return; | 
 |   } | 
 |   createAutoBundle(); | 
 |   Target->_add(SP, SP, AddAmount); | 
 |   Target->_bic(SP, SP, memOpBicMask(Target->Func)); | 
 | } | 
 |  | 
 | void TargetARM32::Sandboxer::align_sp(size_t Alignment) { | 
 |   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); | 
 |   if (!Target->NeedSandboxing) { | 
 |     Target->alignRegisterPow2(SP, Alignment); | 
 |     return; | 
 |   } | 
 |   createAutoBundle(); | 
 |   Target->alignRegisterPow2(SP, Alignment); | 
 |   Target->_bic(SP, SP, memOpBicMask(Target->Func)); | 
 | } | 
 |  | 
 | InstARM32Call *TargetARM32::Sandboxer::bl(Variable *ReturnReg, | 
 |                                           Operand *CallTarget) { | 
 |   if (Target->NeedSandboxing) { | 
 |     createAutoBundle(); | 
 |     if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) { | 
 |       Target->_bic(CallTargetR, CallTargetR, | 
 |                    indirectBranchBicMask(Target->Func)); | 
 |     } | 
 |   } | 
 |   return Target->Context.insert<InstARM32Call>(ReturnReg, CallTarget); | 
 | } | 
 |  | 
 | void TargetARM32::Sandboxer::ldr(Variable *Dest, OperandARM32Mem *Mem, | 
 |                                  CondARM32::Cond Pred) { | 
 |   Variable *MemBase = Mem->getBase(); | 
 |   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) { | 
 |     createAutoBundle(); | 
 |     assert(!Mem->isRegReg()); | 
 |     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred); | 
 |   } | 
 |   Target->_ldr(Dest, Mem, Pred); | 
 | } | 
 |  | 
 | void TargetARM32::Sandboxer::ldrex(Variable *Dest, OperandARM32Mem *Mem, | 
 |                                    CondARM32::Cond Pred) { | 
 |   Variable *MemBase = Mem->getBase(); | 
 |   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) { | 
 |     createAutoBundle(); | 
 |     assert(!Mem->isRegReg()); | 
 |     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred); | 
 |   } | 
 |   Target->_ldrex(Dest, Mem, Pred); | 
 | } | 
 |  | 
 | void TargetARM32::Sandboxer::reset_sp(Variable *Src) { | 
 |   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); | 
 |   if (!Target->NeedSandboxing) { | 
 |     Target->_mov_redefined(SP, Src); | 
 |     return; | 
 |   } | 
 |   createAutoBundle(); | 
 |   Target->_mov_redefined(SP, Src); | 
 |   Target->_bic(SP, SP, memOpBicMask(Target->Func)); | 
 | } | 
 |  | 
 | void TargetARM32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) { | 
 |   if (Target->NeedSandboxing) { | 
 |     createAutoBundle(); | 
 |     Target->_bic(RetAddr, RetAddr, indirectBranchBicMask(Target->Func)); | 
 |   } | 
 |   Target->_ret(RetAddr, RetValue); | 
 | } | 
 |  | 
 | void TargetARM32::Sandboxer::str(Variable *Src, OperandARM32Mem *Mem, | 
 |                                  CondARM32::Cond Pred) { | 
 |   Variable *MemBase = Mem->getBase(); | 
 |   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) { | 
 |     createAutoBundle(); | 
 |     assert(!Mem->isRegReg()); | 
 |     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred); | 
 |   } | 
 |   Target->_str(Src, Mem, Pred); | 
 | } | 
 |  | 
 | void TargetARM32::Sandboxer::strex(Variable *Dest, Variable *Src, | 
 |                                    OperandARM32Mem *Mem, CondARM32::Cond Pred) { | 
 |   Variable *MemBase = Mem->getBase(); | 
 |   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) { | 
 |     createAutoBundle(); | 
 |     assert(!Mem->isRegReg()); | 
 |     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred); | 
 |   } | 
 |   Target->_strex(Dest, Src, Mem, Pred); | 
 | } | 
 |  | 
 | void TargetARM32::Sandboxer::sub_sp(Operand *SubAmount) { | 
 |   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); | 
 |   if (!Target->NeedSandboxing) { | 
 |     Target->_sub(SP, SP, SubAmount); | 
 |     return; | 
 |   } | 
 |   createAutoBundle(); | 
 |   Target->_sub(SP, SP, SubAmount); | 
 |   Target->_bic(SP, SP, memOpBicMask(Target->Func)); | 
 | } | 
 |  | 
 | TargetDataARM32::TargetDataARM32(GlobalContext *Ctx) | 
 |     : TargetDataLowering(Ctx) {} | 
 |  | 
 | void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars, | 
 |                                    const std::string &SectionSuffix) { | 
 |   const bool IsPIC = getFlags().getUseNonsfi(); | 
 |   switch (getFlags().getOutFileType()) { | 
 |   case FT_Elf: { | 
 |     ELFObjectWriter *Writer = Ctx->getObjectWriter(); | 
 |     Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix, | 
 |                              IsPIC); | 
 |   } break; | 
 |   case FT_Asm: | 
 |   case FT_Iasm: { | 
 |     OstreamLocker _(Ctx); | 
 |     for (const VariableDeclaration *Var : Vars) { | 
 |       if (getFlags().matchTranslateOnly(Var->getName(), 0)) { | 
 |         emitGlobal(*Var, SectionSuffix); | 
 |       } | 
 |     } | 
 |   } break; | 
 |   } | 
 | } | 
 |  | 
 | namespace { | 
 | template <typename T> struct ConstantPoolEmitterTraits; | 
 |  | 
 | static_assert(sizeof(uint64_t) == 8, | 
 |               "uint64_t is supposed to be 8 bytes wide."); | 
 |  | 
 | // TODO(jpp): implement the following when implementing constant randomization: | 
 | //  * template <> struct ConstantPoolEmitterTraits<uint8_t> | 
 | //  * template <> struct ConstantPoolEmitterTraits<uint16_t> | 
 | //  * template <> struct ConstantPoolEmitterTraits<uint32_t> | 
 | template <> struct ConstantPoolEmitterTraits<float> { | 
 |   using ConstantType = ConstantFloat; | 
 |   static constexpr Type IceType = IceType_f32; | 
 |   // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy | 
 |   // about them being constexpr. | 
 |   static const char AsmTag[]; | 
 |   static const char TypeName[]; | 
 |   static uint64_t bitcastToUint64(float Value) { | 
 |     static_assert(sizeof(Value) == sizeof(uint32_t), | 
 |                   "Float should be 4 bytes."); | 
 |     const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value); | 
 |     return static_cast<uint64_t>(IntValue); | 
 |   } | 
 | }; | 
 | const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long"; | 
 | const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32"; | 
 |  | 
 | template <> struct ConstantPoolEmitterTraits<double> { | 
 |   using ConstantType = ConstantDouble; | 
 |   static constexpr Type IceType = IceType_f64; | 
 |   static const char AsmTag[]; | 
 |   static const char TypeName[]; | 
 |   static uint64_t bitcastToUint64(double Value) { | 
 |     static_assert(sizeof(double) == sizeof(uint64_t), | 
 |                   "Double should be 8 bytes."); | 
 |     return Utils::bitCopy<uint64_t>(Value); | 
 |   } | 
 | }; | 
 | const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad"; | 
 | const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64"; | 
 |  | 
 | template <typename T> | 
 | void emitConstant( | 
 |     Ostream &Str, | 
 |     const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) { | 
 |   using Traits = ConstantPoolEmitterTraits<T>; | 
 |   Str << Const->getLabelName(); | 
 |   Str << ":\n\t" << Traits::AsmTag << "\t0x"; | 
 |   T Value = Const->getValue(); | 
 |   Str.write_hex(Traits::bitcastToUint64(Value)); | 
 |   Str << "\t/* " << Traits::TypeName << " " << Value << " */\n"; | 
 | } | 
 |  | 
 | template <typename T> void emitConstantPool(GlobalContext *Ctx) { | 
 |   if (!BuildDefs::dump()) { | 
 |     return; | 
 |   } | 
 |  | 
 |   using Traits = ConstantPoolEmitterTraits<T>; | 
 |   static constexpr size_t MinimumAlignment = 4; | 
 |   SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType)); | 
 |   assert((Align % 4) == 0 && "Constants should be aligned"); | 
 |   Ostream &Str = Ctx->getStrEmit(); | 
 |   ConstantList Pool = Ctx->getConstantPool(Traits::IceType); | 
 |  | 
 |   Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align | 
 |       << "\n" | 
 |       << "\t.align\t" << Align << "\n"; | 
 |  | 
 |   if (getFlags().getReorderPooledConstants()) { | 
 |     // TODO(jpp): add constant pooling. | 
 |     UnimplementedError(getFlags()); | 
 |   } | 
 |  | 
 |   for (Constant *C : Pool) { | 
 |     if (!C->getShouldBePooled()) { | 
 |       continue; | 
 |     } | 
 |  | 
 |     emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C)); | 
 |   } | 
 | } | 
 | } // end of anonymous namespace | 
 |  | 
 | void TargetDataARM32::lowerConstants() { | 
 |   if (getFlags().getDisableTranslation()) | 
 |     return; | 
 |   switch (getFlags().getOutFileType()) { | 
 |   case FT_Elf: { | 
 |     ELFObjectWriter *Writer = Ctx->getObjectWriter(); | 
 |     Writer->writeConstantPool<ConstantFloat>(IceType_f32); | 
 |     Writer->writeConstantPool<ConstantDouble>(IceType_f64); | 
 |   } break; | 
 |   case FT_Asm: | 
 |   case FT_Iasm: { | 
 |     OstreamLocker _(Ctx); | 
 |     emitConstantPool<float>(Ctx); | 
 |     emitConstantPool<double>(Ctx); | 
 |     break; | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | void TargetDataARM32::lowerJumpTables() { | 
 |   if (getFlags().getDisableTranslation()) | 
 |     return; | 
 |   switch (getFlags().getOutFileType()) { | 
 |   case FT_Elf: | 
 |     if (!Ctx->getJumpTables().empty()) { | 
 |       llvm::report_fatal_error("ARM32 does not support jump tables yet."); | 
 |     } | 
 |     break; | 
 |   case FT_Asm: | 
 |     // Already emitted from Cfg | 
 |     break; | 
 |   case FT_Iasm: { | 
 |     // TODO(kschimpf): Fill this in when we get more information. | 
 |     break; | 
 |   } | 
 |   } | 
 | } | 
 |  | 
 | TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) | 
 |     : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {} | 
 |  | 
 | void TargetHeaderARM32::lower() { | 
 |   OstreamLocker _(Ctx); | 
 |   Ostream &Str = Ctx->getStrEmit(); | 
 |   Str << ".syntax unified\n"; | 
 |   // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of | 
 |   // "Addenda to, and Errata in the ABI for the ARM architecture" | 
 |   // http://infocenter.arm.com | 
 |   //                  /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf | 
 |   // | 
 |   // Tag_conformance should be be emitted first in a file-scope sub-subsection | 
 |   // of the first public subsection of the attributes. | 
 |   Str << ".eabi_attribute 67, \"2.09\"      @ Tag_conformance\n"; | 
 |   // Chromebooks are at least A15, but do A9 for higher compat. For some | 
 |   // reason, the LLVM ARM asm parser has the .cpu directive override the mattr | 
 |   // specified on the commandline. So to test hwdiv, we need to set the .cpu | 
 |   // directive higher (can't just rely on --mattr=...). | 
 |   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 
 |     Str << ".cpu    cortex-a15\n"; | 
 |   } else { | 
 |     Str << ".cpu    cortex-a9\n"; | 
 |   } | 
 |   Str << ".eabi_attribute 6, 10   @ Tag_CPU_arch: ARMv7\n" | 
 |       << ".eabi_attribute 7, 65   @ Tag_CPU_arch_profile: App profile\n"; | 
 |   Str << ".eabi_attribute 8, 1    @ Tag_ARM_ISA_use: Yes\n" | 
 |       << ".eabi_attribute 9, 2    @ Tag_THUMB_ISA_use: Thumb-2\n"; | 
 |   Str << ".fpu    neon\n" | 
 |       << ".eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use: permit directly\n" | 
 |       << ".eabi_attribute 20, 1   @ Tag_ABI_FP_denormal\n" | 
 |       << ".eabi_attribute 21, 1   @ Tag_ABI_FP_exceptions\n" | 
 |       << ".eabi_attribute 23, 3   @ Tag_ABI_FP_number_model: IEEE 754\n" | 
 |       << ".eabi_attribute 34, 1   @ Tag_CPU_unaligned_access\n" | 
 |       << ".eabi_attribute 24, 1   @ Tag_ABI_align_needed: 8-byte\n" | 
 |       << ".eabi_attribute 25, 1   @ Tag_ABI_align_preserved: 8-byte\n" | 
 |       << ".eabi_attribute 28, 1   @ Tag_ABI_VFP_args\n" | 
 |       << ".eabi_attribute 36, 1   @ Tag_FP_HP_extension\n" | 
 |       << ".eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format\n" | 
 |       << ".eabi_attribute 42, 1   @ Tag_MPextension_use\n" | 
 |       << ".eabi_attribute 68, 1   @ Tag_Virtualization_use\n"; | 
 |   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 
 |     Str << ".eabi_attribute 44, 2   @ Tag_DIV_use\n"; | 
 |   } | 
 |   // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 
 |   // However, for compatibility with current NaCl LLVM, don't claim that. | 
 |   Str << ".eabi_attribute 14, 3   @ Tag_ABI_PCS_R9_use: Not used\n"; | 
 | } | 
 |  | 
 | SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; | 
 | SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; | 
 | SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 
 |  | 
 | } // end of namespace ARM32 | 
 | } // end of namespace Ice |