| //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the AArch64-specific support for the FastISel class. Some |
| // of the target-specific code is generated by tablegen in the file |
| // AArch64GenFastISel.inc, which is #included here. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AArch64.h" |
| #include "AArch64CallingConvention.h" |
| #include "AArch64RegisterInfo.h" |
| #include "AArch64Subtarget.h" |
| #include "MCTargetDesc/AArch64AddressingModes.h" |
| #include "Utils/AArch64BaseInfo.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/Analysis/BranchProbabilityInfo.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/FastISel.h" |
| #include "llvm/CodeGen/FunctionLoweringInfo.h" |
| #include "llvm/CodeGen/ISDOpcodes.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineConstantPool.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineMemOperand.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/Argument.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GetElementPtrTypeIterator.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/InstrTypes.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/Operator.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/User.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/MC/MCInstrDesc.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/MC/MCSymbol.h" |
| #include "llvm/Support/AtomicOrdering.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CodeGen.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/MachineValueType.h" |
| #include "llvm/Support/MathExtras.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdint> |
| #include <iterator> |
| #include <utility> |
| |
| using namespace llvm; |
| |
| namespace { |
| |
| class AArch64FastISel final : public FastISel { |
| class Address { |
| public: |
| using BaseKind = enum { |
| RegBase, |
| FrameIndexBase |
| }; |
| |
| private: |
| BaseKind Kind = RegBase; |
| AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; |
| union { |
| unsigned Reg; |
| int FI; |
| } Base; |
| unsigned OffsetReg = 0; |
| unsigned Shift = 0; |
| int64_t Offset = 0; |
| const GlobalValue *GV = nullptr; |
| |
| public: |
| Address() { Base.Reg = 0; } |
| |
| void setKind(BaseKind K) { Kind = K; } |
| BaseKind getKind() const { return Kind; } |
| void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } |
| AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } |
| bool isRegBase() const { return Kind == RegBase; } |
| bool isFIBase() const { return Kind == FrameIndexBase; } |
| |
| void setReg(unsigned Reg) { |
| assert(isRegBase() && "Invalid base register access!"); |
| Base.Reg = Reg; |
| } |
| |
| unsigned getReg() const { |
| assert(isRegBase() && "Invalid base register access!"); |
| return Base.Reg; |
| } |
| |
| void setOffsetReg(unsigned Reg) { |
| OffsetReg = Reg; |
| } |
| |
| unsigned getOffsetReg() const { |
| return OffsetReg; |
| } |
| |
| void setFI(unsigned FI) { |
| assert(isFIBase() && "Invalid base frame index access!"); |
| Base.FI = FI; |
| } |
| |
| unsigned getFI() const { |
| assert(isFIBase() && "Invalid base frame index access!"); |
| return Base.FI; |
| } |
| |
| void setOffset(int64_t O) { Offset = O; } |
| int64_t getOffset() { return Offset; } |
| void setShift(unsigned S) { Shift = S; } |
| unsigned getShift() { return Shift; } |
| |
| void setGlobalValue(const GlobalValue *G) { GV = G; } |
| const GlobalValue *getGlobalValue() { return GV; } |
| }; |
| |
| /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
| /// make the right decision when generating code for different targets. |
| const AArch64Subtarget *Subtarget; |
| LLVMContext *Context; |
| |
| bool fastLowerArguments() override; |
| bool fastLowerCall(CallLoweringInfo &CLI) override; |
| bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; |
| |
| private: |
| // Selection routines. |
| bool selectAddSub(const Instruction *I); |
| bool selectLogicalOp(const Instruction *I); |
| bool selectLoad(const Instruction *I); |
| bool selectStore(const Instruction *I); |
| bool selectBranch(const Instruction *I); |
| bool selectIndirectBr(const Instruction *I); |
| bool selectCmp(const Instruction *I); |
| bool selectSelect(const Instruction *I); |
| bool selectFPExt(const Instruction *I); |
| bool selectFPTrunc(const Instruction *I); |
| bool selectFPToInt(const Instruction *I, bool Signed); |
| bool selectIntToFP(const Instruction *I, bool Signed); |
| bool selectRem(const Instruction *I, unsigned ISDOpcode); |
| bool selectRet(const Instruction *I); |
| bool selectTrunc(const Instruction *I); |
| bool selectIntExt(const Instruction *I); |
| bool selectMul(const Instruction *I); |
| bool selectShift(const Instruction *I); |
| bool selectBitCast(const Instruction *I); |
| bool selectFRem(const Instruction *I); |
| bool selectSDiv(const Instruction *I); |
| bool selectGetElementPtr(const Instruction *I); |
| bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); |
| |
| // Utility helper routines. |
| bool isTypeLegal(Type *Ty, MVT &VT); |
| bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); |
| bool isValueAvailable(const Value *V) const; |
| bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); |
| bool computeCallAddress(const Value *V, Address &Addr); |
| bool simplifyAddress(Address &Addr, MVT VT); |
| void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, |
| MachineMemOperand::Flags Flags, |
| unsigned ScaleFactor, MachineMemOperand *MMO); |
| bool isMemCpySmall(uint64_t Len, unsigned Alignment); |
| bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, |
| unsigned Alignment); |
| bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, |
| const Value *Cond); |
| bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); |
| bool optimizeSelect(const SelectInst *SI); |
| std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); |
| |
| // Emit helper routines. |
| unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
| const Value *RHS, bool SetFlags = false, |
| bool WantResult = true, bool IsZExt = false); |
| unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
| bool SetFlags = false, bool WantResult = true); |
| unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, uint64_t Imm, bool SetFlags = false, |
| bool WantResult = true); |
| unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
| AArch64_AM::ShiftExtendType ShiftType, |
| uint64_t ShiftImm, bool SetFlags = false, |
| bool WantResult = true); |
| unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
| AArch64_AM::ShiftExtendType ExtType, |
| uint64_t ShiftImm, bool SetFlags = false, |
| bool WantResult = true); |
| |
| // Emit functions. |
| bool emitCompareAndBranch(const BranchInst *BI); |
| bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); |
| bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); |
| bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); |
| bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); |
| unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, |
| MachineMemOperand *MMO = nullptr); |
| bool emitStore(MVT VT, unsigned SrcReg, Address Addr, |
| MachineMemOperand *MMO = nullptr); |
| bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, |
| MachineMemOperand *MMO = nullptr); |
| unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); |
| unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); |
| unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool SetFlags = false, bool WantResult = true, |
| bool IsZExt = false); |
| unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); |
| unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool SetFlags = false, bool WantResult = true, |
| bool IsZExt = false); |
| unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
| unsigned RHSReg, bool RHSIsKill, bool WantResult = true); |
| unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
| unsigned RHSReg, bool RHSIsKill, |
| AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, |
| bool WantResult = true); |
| unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, |
| const Value *RHS); |
| unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, uint64_t Imm); |
| unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
| uint64_t ShiftImm); |
| unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); |
| unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
| unsigned Op1, bool Op1IsKill); |
| unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
| unsigned Op1, bool Op1IsKill); |
| unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
| unsigned Op1, bool Op1IsKill); |
| unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
| unsigned Op1Reg, bool Op1IsKill); |
| unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, |
| uint64_t Imm, bool IsZExt = true); |
| unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
| unsigned Op1Reg, bool Op1IsKill); |
| unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, |
| uint64_t Imm, bool IsZExt = true); |
| unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
| unsigned Op1Reg, bool Op1IsKill); |
| unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, |
| uint64_t Imm, bool IsZExt = false); |
| |
| unsigned materializeInt(const ConstantInt *CI, MVT VT); |
| unsigned materializeFP(const ConstantFP *CFP, MVT VT); |
| unsigned materializeGV(const GlobalValue *GV); |
| |
| // Call handling routines. |
| private: |
| CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; |
| bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, |
| unsigned &NumBytes); |
| bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); |
| |
| public: |
| // Backend specific FastISel code. |
| unsigned fastMaterializeAlloca(const AllocaInst *AI) override; |
| unsigned fastMaterializeConstant(const Constant *C) override; |
| unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; |
| |
| explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, |
| const TargetLibraryInfo *LibInfo) |
| : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { |
| Subtarget = |
| &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); |
| Context = &FuncInfo.Fn->getContext(); |
| } |
| |
| bool fastSelectInstruction(const Instruction *I) override; |
| |
| #include "AArch64GenFastISel.inc" |
| }; |
| |
| } // end anonymous namespace |
| |
| /// Check if the sign-/zero-extend will be a noop. |
| static bool isIntExtFree(const Instruction *I) { |
| assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
| "Unexpected integer extend instruction."); |
| assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && |
| "Unexpected value type."); |
| bool IsZExt = isa<ZExtInst>(I); |
| |
| if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) |
| if (LI->hasOneUse()) |
| return true; |
| |
| if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) |
| if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) |
| return true; |
| |
| return false; |
| } |
| |
| /// Determine the implicit scale factor that is applied by a memory |
| /// operation for a given value type. |
| static unsigned getImplicitScaleFactor(MVT VT) { |
| switch (VT.SimpleTy) { |
| default: |
| return 0; // invalid |
| case MVT::i1: // fall-through |
| case MVT::i8: |
| return 1; |
| case MVT::i16: |
| return 2; |
| case MVT::i32: // fall-through |
| case MVT::f32: |
| return 4; |
| case MVT::i64: // fall-through |
| case MVT::f64: |
| return 8; |
| } |
| } |
| |
| CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { |
| if (CC == CallingConv::WebKit_JS) |
| return CC_AArch64_WebKit_JS; |
| if (CC == CallingConv::GHC) |
| return CC_AArch64_GHC; |
| if (CC == CallingConv::CFGuard_Check) |
| return CC_AArch64_Win64_CFGuard_Check; |
| return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; |
| } |
| |
| unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { |
| assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && |
| "Alloca should always return a pointer."); |
| |
| // Don't handle dynamic allocas. |
| if (!FuncInfo.StaticAllocaMap.count(AI)) |
| return 0; |
| |
| DenseMap<const AllocaInst *, int>::iterator SI = |
| FuncInfo.StaticAllocaMap.find(AI); |
| |
| if (SI != FuncInfo.StaticAllocaMap.end()) { |
| unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), |
| ResultReg) |
| .addFrameIndex(SI->second) |
| .addImm(0) |
| .addImm(0); |
| return ResultReg; |
| } |
| |
| return 0; |
| } |
| |
| unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { |
| if (VT > MVT::i64) |
| return 0; |
| |
| if (!CI->isZero()) |
| return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); |
| |
| // Create a copy from the zero register to materialize a "0" value. |
| const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass |
| : &AArch64::GPR32RegClass; |
| unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
| unsigned ResultReg = createResultReg(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), |
| ResultReg).addReg(ZeroReg, getKillRegState(true)); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { |
| // Positive zero (+0.0) has to be materialized with a fmov from the zero |
| // register, because the immediate version of fmov cannot encode zero. |
| if (CFP->isNullValue()) |
| return fastMaterializeFloatZero(CFP); |
| |
| if (VT != MVT::f32 && VT != MVT::f64) |
| return 0; |
| |
| const APFloat Val = CFP->getValueAPF(); |
| bool Is64Bit = (VT == MVT::f64); |
| // This checks to see if we can use FMOV instructions to materialize |
| // a constant, otherwise we have to materialize via the constant pool. |
| int Imm = |
| Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); |
| if (Imm != -1) { |
| unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; |
| return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); |
| } |
| |
| // For the MachO large code model materialize the FP constant in code. |
| if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { |
| unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; |
| const TargetRegisterClass *RC = Is64Bit ? |
| &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| |
| unsigned TmpReg = createResultReg(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) |
| .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); |
| |
| unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), ResultReg) |
| .addReg(TmpReg, getKillRegState(true)); |
| |
| return ResultReg; |
| } |
| |
| // Materialize via constant pool. MachineConstantPool wants an explicit |
| // alignment. |
| unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); |
| if (Align == 0) |
| Align = DL.getTypeAllocSize(CFP->getType()); |
| |
| unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); |
| unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
| ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); |
| |
| unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; |
| unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) |
| .addReg(ADRPReg) |
| .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { |
| // We can't handle thread-local variables quickly yet. |
| if (GV->isThreadLocal()) |
| return 0; |
| |
| // MachO still uses GOT for large code-model accesses, but ELF requires |
| // movz/movk sequences, which FastISel doesn't handle yet. |
| if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) |
| return 0; |
| |
| unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); |
| |
| EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); |
| if (!DestEVT.isSimple()) |
| return 0; |
| |
| unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
| unsigned ResultReg; |
| |
| if (OpFlags & AArch64II::MO_GOT) { |
| // ADRP + LDRX |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
| ADRPReg) |
| .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); |
| |
| unsigned LdrOpc; |
| if (Subtarget->isTargetILP32()) { |
| ResultReg = createResultReg(&AArch64::GPR32RegClass); |
| LdrOpc = AArch64::LDRWui; |
| } else { |
| ResultReg = createResultReg(&AArch64::GPR64RegClass); |
| LdrOpc = AArch64::LDRXui; |
| } |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), |
| ResultReg) |
| .addReg(ADRPReg) |
| .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | |
| AArch64II::MO_NC | OpFlags); |
| if (!Subtarget->isTargetILP32()) |
| return ResultReg; |
| |
| // LDRWui produces a 32-bit register, but pointers in-register are 64-bits |
| // so we must extend the result on ILP32. |
| unsigned Result64 = createResultReg(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::SUBREG_TO_REG)) |
| .addDef(Result64) |
| .addImm(0) |
| .addReg(ResultReg, RegState::Kill) |
| .addImm(AArch64::sub_32); |
| return Result64; |
| } else { |
| // ADRP + ADDX |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
| ADRPReg) |
| .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); |
| |
| ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), |
| ResultReg) |
| .addReg(ADRPReg) |
| .addGlobalAddress(GV, 0, |
| AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) |
| .addImm(0); |
| } |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { |
| EVT CEVT = TLI.getValueType(DL, C->getType(), true); |
| |
| // Only handle simple types. |
| if (!CEVT.isSimple()) |
| return 0; |
| MVT VT = CEVT.getSimpleVT(); |
| // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, |
| // 'null' pointers need to have a somewhat special treatment. |
| if (const auto *CPN = dyn_cast<ConstantPointerNull>(C)) { |
| (void)CPN; |
| assert(CPN->getType()->getPointerAddressSpace() == 0 && |
| "Unexpected address space"); |
| assert(VT == MVT::i64 && "Expected 64-bit pointers"); |
| return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); |
| } |
| |
| if (const auto *CI = dyn_cast<ConstantInt>(C)) |
| return materializeInt(CI, VT); |
| else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) |
| return materializeFP(CFP, VT); |
| else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) |
| return materializeGV(GV); |
| |
| return 0; |
| } |
| |
| unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { |
| assert(CFP->isNullValue() && |
| "Floating-point constant is not a positive zero."); |
| MVT VT; |
| if (!isTypeLegal(CFP->getType(), VT)) |
| return 0; |
| |
| if (VT != MVT::f32 && VT != MVT::f64) |
| return 0; |
| |
| bool Is64Bit = (VT == MVT::f64); |
| unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; |
| return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); |
| } |
| |
| /// Check if the multiply is by a power-of-2 constant. |
| static bool isMulPowOf2(const Value *I) { |
| if (const auto *MI = dyn_cast<MulOperator>(I)) { |
| if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) |
| if (C->getValue().isPowerOf2()) |
| return true; |
| if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) |
| if (C->getValue().isPowerOf2()) |
| return true; |
| } |
| return false; |
| } |
| |
| // Computes the address to get to an object. |
| bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) |
| { |
| const User *U = nullptr; |
| unsigned Opcode = Instruction::UserOp1; |
| if (const Instruction *I = dyn_cast<Instruction>(Obj)) { |
| // Don't walk into other basic blocks unless the object is an alloca from |
| // another block, otherwise it may not have a virtual register assigned. |
| if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || |
| FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
| Opcode = I->getOpcode(); |
| U = I; |
| } |
| } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { |
| Opcode = C->getOpcode(); |
| U = C; |
| } |
| |
| if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) |
| if (Ty->getAddressSpace() > 255) |
| // Fast instruction selection doesn't support the special |
| // address spaces. |
| return false; |
| |
| switch (Opcode) { |
| default: |
| break; |
| case Instruction::BitCast: |
| // Look through bitcasts. |
| return computeAddress(U->getOperand(0), Addr, Ty); |
| |
| case Instruction::IntToPtr: |
| // Look past no-op inttoptrs. |
| if (TLI.getValueType(DL, U->getOperand(0)->getType()) == |
| TLI.getPointerTy(DL)) |
| return computeAddress(U->getOperand(0), Addr, Ty); |
| break; |
| |
| case Instruction::PtrToInt: |
| // Look past no-op ptrtoints. |
| if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) |
| return computeAddress(U->getOperand(0), Addr, Ty); |
| break; |
| |
| case Instruction::GetElementPtr: { |
| Address SavedAddr = Addr; |
| uint64_t TmpOffset = Addr.getOffset(); |
| |
| // Iterate through the GEP folding the constants into offsets where |
| // we can. |
| for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); |
| GTI != E; ++GTI) { |
| const Value *Op = GTI.getOperand(); |
| if (StructType *STy = GTI.getStructTypeOrNull()) { |
| const StructLayout *SL = DL.getStructLayout(STy); |
| unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); |
| TmpOffset += SL->getElementOffset(Idx); |
| } else { |
| uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); |
| while (true) { |
| if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { |
| // Constant-offset addressing. |
| TmpOffset += CI->getSExtValue() * S; |
| break; |
| } |
| if (canFoldAddIntoGEP(U, Op)) { |
| // A compatible add with a constant operand. Fold the constant. |
| ConstantInt *CI = |
| cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); |
| TmpOffset += CI->getSExtValue() * S; |
| // Iterate on the other operand. |
| Op = cast<AddOperator>(Op)->getOperand(0); |
| continue; |
| } |
| // Unsupported |
| goto unsupported_gep; |
| } |
| } |
| } |
| |
| // Try to grab the base operand now. |
| Addr.setOffset(TmpOffset); |
| if (computeAddress(U->getOperand(0), Addr, Ty)) |
| return true; |
| |
| // We failed, restore everything and try the other options. |
| Addr = SavedAddr; |
| |
| unsupported_gep: |
| break; |
| } |
| case Instruction::Alloca: { |
| const AllocaInst *AI = cast<AllocaInst>(Obj); |
| DenseMap<const AllocaInst *, int>::iterator SI = |
| FuncInfo.StaticAllocaMap.find(AI); |
| if (SI != FuncInfo.StaticAllocaMap.end()) { |
| Addr.setKind(Address::FrameIndexBase); |
| Addr.setFI(SI->second); |
| return true; |
| } |
| break; |
| } |
| case Instruction::Add: { |
| // Adds of constants are common and easy enough. |
| const Value *LHS = U->getOperand(0); |
| const Value *RHS = U->getOperand(1); |
| |
| if (isa<ConstantInt>(LHS)) |
| std::swap(LHS, RHS); |
| |
| if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { |
| Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); |
| return computeAddress(LHS, Addr, Ty); |
| } |
| |
| Address Backup = Addr; |
| if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) |
| return true; |
| Addr = Backup; |
| |
| break; |
| } |
| case Instruction::Sub: { |
| // Subs of constants are common and easy enough. |
| const Value *LHS = U->getOperand(0); |
| const Value *RHS = U->getOperand(1); |
| |
| if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { |
| Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); |
| return computeAddress(LHS, Addr, Ty); |
| } |
| break; |
| } |
| case Instruction::Shl: { |
| if (Addr.getOffsetReg()) |
| break; |
| |
| const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); |
| if (!CI) |
| break; |
| |
| unsigned Val = CI->getZExtValue(); |
| if (Val < 1 || Val > 3) |
| break; |
| |
| uint64_t NumBytes = 0; |
| if (Ty && Ty->isSized()) { |
| uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
| NumBytes = NumBits / 8; |
| if (!isPowerOf2_64(NumBits)) |
| NumBytes = 0; |
| } |
| |
| if (NumBytes != (1ULL << Val)) |
| break; |
| |
| Addr.setShift(Val); |
| Addr.setExtendType(AArch64_AM::LSL); |
| |
| const Value *Src = U->getOperand(0); |
| if (const auto *I = dyn_cast<Instruction>(Src)) { |
| if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
| // Fold the zext or sext when it won't become a noop. |
| if (const auto *ZE = dyn_cast<ZExtInst>(I)) { |
| if (!isIntExtFree(ZE) && |
| ZE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::UXTW); |
| Src = ZE->getOperand(0); |
| } |
| } else if (const auto *SE = dyn_cast<SExtInst>(I)) { |
| if (!isIntExtFree(SE) && |
| SE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::SXTW); |
| Src = SE->getOperand(0); |
| } |
| } |
| } |
| } |
| |
| if (const auto *AI = dyn_cast<BinaryOperator>(Src)) |
| if (AI->getOpcode() == Instruction::And) { |
| const Value *LHS = AI->getOperand(0); |
| const Value *RHS = AI->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
| if (C->getValue() == 0xffffffff) |
| std::swap(LHS, RHS); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
| if (C->getValue() == 0xffffffff) { |
| Addr.setExtendType(AArch64_AM::UXTW); |
| unsigned Reg = getRegForValue(LHS); |
| if (!Reg) |
| return false; |
| bool RegIsKill = hasTrivialKill(LHS); |
| Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, |
| AArch64::sub_32); |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| } |
| |
| unsigned Reg = getRegForValue(Src); |
| if (!Reg) |
| return false; |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| case Instruction::Mul: { |
| if (Addr.getOffsetReg()) |
| break; |
| |
| if (!isMulPowOf2(U)) |
| break; |
| |
| const Value *LHS = U->getOperand(0); |
| const Value *RHS = U->getOperand(1); |
| |
| // Canonicalize power-of-2 value to the RHS. |
| if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
| if (C->getValue().isPowerOf2()) |
| std::swap(LHS, RHS); |
| |
| assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); |
| const auto *C = cast<ConstantInt>(RHS); |
| unsigned Val = C->getValue().logBase2(); |
| if (Val < 1 || Val > 3) |
| break; |
| |
| uint64_t NumBytes = 0; |
| if (Ty && Ty->isSized()) { |
| uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
| NumBytes = NumBits / 8; |
| if (!isPowerOf2_64(NumBits)) |
| NumBytes = 0; |
| } |
| |
| if (NumBytes != (1ULL << Val)) |
| break; |
| |
| Addr.setShift(Val); |
| Addr.setExtendType(AArch64_AM::LSL); |
| |
| const Value *Src = LHS; |
| if (const auto *I = dyn_cast<Instruction>(Src)) { |
| if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
| // Fold the zext or sext when it won't become a noop. |
| if (const auto *ZE = dyn_cast<ZExtInst>(I)) { |
| if (!isIntExtFree(ZE) && |
| ZE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::UXTW); |
| Src = ZE->getOperand(0); |
| } |
| } else if (const auto *SE = dyn_cast<SExtInst>(I)) { |
| if (!isIntExtFree(SE) && |
| SE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::SXTW); |
| Src = SE->getOperand(0); |
| } |
| } |
| } |
| } |
| |
| unsigned Reg = getRegForValue(Src); |
| if (!Reg) |
| return false; |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| case Instruction::And: { |
| if (Addr.getOffsetReg()) |
| break; |
| |
| if (!Ty || DL.getTypeSizeInBits(Ty) != 8) |
| break; |
| |
| const Value *LHS = U->getOperand(0); |
| const Value *RHS = U->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
| if (C->getValue() == 0xffffffff) |
| std::swap(LHS, RHS); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
| if (C->getValue() == 0xffffffff) { |
| Addr.setShift(0); |
| Addr.setExtendType(AArch64_AM::LSL); |
| Addr.setExtendType(AArch64_AM::UXTW); |
| |
| unsigned Reg = getRegForValue(LHS); |
| if (!Reg) |
| return false; |
| bool RegIsKill = hasTrivialKill(LHS); |
| Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, |
| AArch64::sub_32); |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| break; |
| } |
| case Instruction::SExt: |
| case Instruction::ZExt: { |
| if (!Addr.getReg() || Addr.getOffsetReg()) |
| break; |
| |
| const Value *Src = nullptr; |
| // Fold the zext or sext when it won't become a noop. |
| if (const auto *ZE = dyn_cast<ZExtInst>(U)) { |
| if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::UXTW); |
| Src = ZE->getOperand(0); |
| } |
| } else if (const auto *SE = dyn_cast<SExtInst>(U)) { |
| if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::SXTW); |
| Src = SE->getOperand(0); |
| } |
| } |
| |
| if (!Src) |
| break; |
| |
| Addr.setShift(0); |
| unsigned Reg = getRegForValue(Src); |
| if (!Reg) |
| return false; |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| } // end switch |
| |
| if (Addr.isRegBase() && !Addr.getReg()) { |
| unsigned Reg = getRegForValue(Obj); |
| if (!Reg) |
| return false; |
| Addr.setReg(Reg); |
| return true; |
| } |
| |
| if (!Addr.getOffsetReg()) { |
| unsigned Reg = getRegForValue(Obj); |
| if (!Reg) |
| return false; |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { |
| const User *U = nullptr; |
| unsigned Opcode = Instruction::UserOp1; |
| bool InMBB = true; |
| |
| if (const auto *I = dyn_cast<Instruction>(V)) { |
| Opcode = I->getOpcode(); |
| U = I; |
| InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); |
| } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { |
| Opcode = C->getOpcode(); |
| U = C; |
| } |
| |
| switch (Opcode) { |
| default: break; |
| case Instruction::BitCast: |
| // Look past bitcasts if its operand is in the same BB. |
| if (InMBB) |
| return computeCallAddress(U->getOperand(0), Addr); |
| break; |
| case Instruction::IntToPtr: |
| // Look past no-op inttoptrs if its operand is in the same BB. |
| if (InMBB && |
| TLI.getValueType(DL, U->getOperand(0)->getType()) == |
| TLI.getPointerTy(DL)) |
| return computeCallAddress(U->getOperand(0), Addr); |
| break; |
| case Instruction::PtrToInt: |
| // Look past no-op ptrtoints if its operand is in the same BB. |
| if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) |
| return computeCallAddress(U->getOperand(0), Addr); |
| break; |
| } |
| |
| if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { |
| Addr.setGlobalValue(GV); |
| return true; |
| } |
| |
| // If all else fails, try to materialize the value in a register. |
| if (!Addr.getGlobalValue()) { |
| Addr.setReg(getRegForValue(V)); |
| return Addr.getReg() != 0; |
| } |
| |
| return false; |
| } |
| |
| bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { |
| EVT evt = TLI.getValueType(DL, Ty, true); |
| |
| if (Subtarget->isTargetILP32() && Ty->isPointerTy()) |
| return false; |
| |
| // Only handle simple types. |
| if (evt == MVT::Other || !evt.isSimple()) |
| return false; |
| VT = evt.getSimpleVT(); |
| |
| // This is a legal type, but it's not something we handle in fast-isel. |
| if (VT == MVT::f128) |
| return false; |
| |
| // Handle all other legal types, i.e. a register that will directly hold this |
| // value. |
| return TLI.isTypeLegal(VT); |
| } |
| |
| /// Determine if the value type is supported by FastISel. |
| /// |
| /// FastISel for AArch64 can handle more value types than are legal. This adds |
| /// simple value type such as i1, i8, and i16. |
| bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { |
| if (Ty->isVectorTy() && !IsVectorAllowed) |
| return false; |
| |
| if (isTypeLegal(Ty, VT)) |
| return true; |
| |
| // If this is a type than can be sign or zero-extended to a basic operation |
| // go ahead and accept it now. |
| if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) |
| return true; |
| |
| return false; |
| } |
| |
| bool AArch64FastISel::isValueAvailable(const Value *V) const { |
| if (!isa<Instruction>(V)) |
| return true; |
| |
| const auto *I = cast<Instruction>(V); |
| return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; |
| } |
| |
| bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { |
| if (Subtarget->isTargetILP32()) |
| return false; |
| |
| unsigned ScaleFactor = getImplicitScaleFactor(VT); |
| if (!ScaleFactor) |
| return false; |
| |
| bool ImmediateOffsetNeedsLowering = false; |
| bool RegisterOffsetNeedsLowering = false; |
| int64_t Offset = Addr.getOffset(); |
| if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) |
| ImmediateOffsetNeedsLowering = true; |
| else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && |
| !isUInt<12>(Offset / ScaleFactor)) |
| ImmediateOffsetNeedsLowering = true; |
| |
| // Cannot encode an offset register and an immediate offset in the same |
| // instruction. Fold the immediate offset into the load/store instruction and |
| // emit an additional add to take care of the offset register. |
| if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) |
| RegisterOffsetNeedsLowering = true; |
| |
| // Cannot encode zero register as base. |
| if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) |
| RegisterOffsetNeedsLowering = true; |
| |
| // If this is a stack pointer and the offset needs to be simplified then put |
| // the alloca address into a register, set the base type back to register and |
| // continue. This should almost never happen. |
| if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) |
| { |
| unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), |
| ResultReg) |
| .addFrameIndex(Addr.getFI()) |
| .addImm(0) |
| .addImm(0); |
| Addr.setKind(Address::RegBase); |
| Addr.setReg(ResultReg); |
| } |
| |
| if (RegisterOffsetNeedsLowering) { |
| unsigned ResultReg = 0; |
| if (Addr.getReg()) { |
| if (Addr.getExtendType() == AArch64_AM::SXTW || |
| Addr.getExtendType() == AArch64_AM::UXTW ) |
| ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
| /*TODO:IsKill=*/false, Addr.getOffsetReg(), |
| /*TODO:IsKill=*/false, Addr.getExtendType(), |
| Addr.getShift()); |
| else |
| ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
| /*TODO:IsKill=*/false, Addr.getOffsetReg(), |
| /*TODO:IsKill=*/false, AArch64_AM::LSL, |
| Addr.getShift()); |
| } else { |
| if (Addr.getExtendType() == AArch64_AM::UXTW) |
| ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
| /*Op0IsKill=*/false, Addr.getShift(), |
| /*IsZExt=*/true); |
| else if (Addr.getExtendType() == AArch64_AM::SXTW) |
| ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
| /*Op0IsKill=*/false, Addr.getShift(), |
| /*IsZExt=*/false); |
| else |
| ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), |
| /*Op0IsKill=*/false, Addr.getShift()); |
| } |
| if (!ResultReg) |
| return false; |
| |
| Addr.setReg(ResultReg); |
| Addr.setOffsetReg(0); |
| Addr.setShift(0); |
| Addr.setExtendType(AArch64_AM::InvalidShiftExtend); |
| } |
| |
| // Since the offset is too large for the load/store instruction get the |
| // reg+offset into a register. |
| if (ImmediateOffsetNeedsLowering) { |
| unsigned ResultReg; |
| if (Addr.getReg()) |
| // Try to fold the immediate into the add instruction. |
| ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); |
| else |
| ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); |
| |
| if (!ResultReg) |
| return false; |
| Addr.setReg(ResultReg); |
| Addr.setOffset(0); |
| } |
| return true; |
| } |
| |
| void AArch64FastISel::addLoadStoreOperands(Address &Addr, |
| const MachineInstrBuilder &MIB, |
| MachineMemOperand::Flags Flags, |
| unsigned ScaleFactor, |
| MachineMemOperand *MMO) { |
| int64_t Offset = Addr.getOffset() / ScaleFactor; |
| // Frame base works a bit differently. Handle it separately. |
| if (Addr.isFIBase()) { |
| int FI = Addr.getFI(); |
| // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size |
| // and alignment should be based on the VT. |
| MMO = FuncInfo.MF->getMachineMemOperand( |
| MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, |
| MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); |
| // Now add the rest of the operands. |
| MIB.addFrameIndex(FI).addImm(Offset); |
| } else { |
| assert(Addr.isRegBase() && "Unexpected address kind."); |
| const MCInstrDesc &II = MIB->getDesc(); |
| unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; |
| Addr.setReg( |
| constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); |
| Addr.setOffsetReg( |
| constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); |
| if (Addr.getOffsetReg()) { |
| assert(Addr.getOffset() == 0 && "Unexpected offset"); |
| bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || |
| Addr.getExtendType() == AArch64_AM::SXTX; |
| MIB.addReg(Addr.getReg()); |
| MIB.addReg(Addr.getOffsetReg()); |
| MIB.addImm(IsSigned); |
| MIB.addImm(Addr.getShift() != 0); |
| } else |
| MIB.addReg(Addr.getReg()).addImm(Offset); |
| } |
| |
| if (MMO) |
| MIB.addMemOperand(MMO); |
| } |
| |
| unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
| const Value *RHS, bool SetFlags, |
| bool WantResult, bool IsZExt) { |
| AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; |
| bool NeedExtend = false; |
| switch (RetVT.SimpleTy) { |
| default: |
| return 0; |
| case MVT::i1: |
| NeedExtend = true; |
| break; |
| case MVT::i8: |
| NeedExtend = true; |
| ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; |
| break; |
| case MVT::i16: |
| NeedExtend = true; |
| ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; |
| break; |
| case MVT::i32: // fall-through |
| case MVT::i64: |
| break; |
| } |
| MVT SrcVT = RetVT; |
| RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); |
| |
| // Canonicalize immediates to the RHS first. |
| if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) |
| std::swap(LHS, RHS); |
| |
| // Canonicalize mul by power of 2 to the RHS. |
| if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) |
| if (isMulPowOf2(LHS)) |
| std::swap(LHS, RHS); |
| |
| // Canonicalize shift immediate to the RHS. |
| if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) |
| if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) |
| if (isa<ConstantInt>(SI->getOperand(1))) |
| if (SI->getOpcode() == Instruction::Shl || |
| SI->getOpcode() == Instruction::LShr || |
| SI->getOpcode() == Instruction::AShr ) |
| std::swap(LHS, RHS); |
| |
| unsigned LHSReg = getRegForValue(LHS); |
| if (!LHSReg) |
| return 0; |
| bool LHSIsKill = hasTrivialKill(LHS); |
| |
| if (NeedExtend) |
| LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); |
| |
| unsigned ResultReg = 0; |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) { |
| uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); |
| if (C->isNegative()) |
| ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, |
| SetFlags, WantResult); |
| else |
| ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, |
| WantResult); |
| } else if (const auto *C = dyn_cast<Constant>(RHS)) |
| if (C->isNullValue()) |
| ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, |
| WantResult); |
| |
| if (ResultReg) |
| return ResultReg; |
| |
| // Only extend the RHS within the instruction if there is a valid extend type. |
| if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && |
| isValueAvailable(RHS)) { |
| if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) |
| if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) |
| if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { |
| unsigned RHSReg = getRegForValue(SI->getOperand(0)); |
| if (!RHSReg) |
| return 0; |
| bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); |
| return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, |
| RHSIsKill, ExtendType, C->getZExtValue(), |
| SetFlags, WantResult); |
| } |
| unsigned RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return 0; |
| bool RHSIsKill = hasTrivialKill(RHS); |
| return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, |
| ExtendType, 0, SetFlags, WantResult); |
| } |
| |
| // Check if the mul can be folded into the instruction. |
| if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
| if (isMulPowOf2(RHS)) { |
| const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); |
| const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) |
| if (C->getValue().isPowerOf2()) |
| std::swap(MulLHS, MulRHS); |
| |
| assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); |
| uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); |
| unsigned RHSReg = getRegForValue(MulLHS); |
| if (!RHSReg) |
| return 0; |
| bool RHSIsKill = hasTrivialKill(MulLHS); |
| ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, |
| RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, |
| WantResult); |
| if (ResultReg) |
| return ResultReg; |
| } |
| } |
| |
| // Check if the shift can be folded into the instruction. |
| if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
| if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { |
| if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { |
| AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; |
| switch (SI->getOpcode()) { |
| default: break; |
| case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; |
| case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; |
| case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; |
| } |
| uint64_t ShiftVal = C->getZExtValue(); |
| if (ShiftType != AArch64_AM::InvalidShiftExtend) { |
| unsigned RHSReg = getRegForValue(SI->getOperand(0)); |
| if (!RHSReg) |
| return 0; |
| bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); |
| ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, |
| RHSIsKill, ShiftType, ShiftVal, SetFlags, |
| WantResult); |
| if (ResultReg) |
| return ResultReg; |
| } |
| } |
| } |
| } |
| |
| unsigned RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return 0; |
| bool RHSIsKill = hasTrivialKill(RHS); |
| |
| if (NeedExtend) |
| RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); |
| |
| return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, |
| SetFlags, WantResult); |
| } |
| |
| unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, unsigned RHSReg, |
| bool RHSIsKill, bool SetFlags, |
| bool WantResult) { |
| assert(LHSReg && RHSReg && "Invalid register number."); |
| |
| if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || |
| RHSReg == AArch64::SP || RHSReg == AArch64::WSP) |
| return 0; |
| |
| if (RetVT != MVT::i32 && RetVT != MVT::i64) |
| return 0; |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { { AArch64::SUBWrr, AArch64::SUBXrr }, |
| { AArch64::ADDWrr, AArch64::ADDXrr } }, |
| { { AArch64::SUBSWrr, AArch64::SUBSXrr }, |
| { AArch64::ADDSWrr, AArch64::ADDSXrr } } |
| }; |
| bool Is64Bit = RetVT == MVT::i64; |
| unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
| const TargetRegisterClass *RC = |
| Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| unsigned ResultReg; |
| if (WantResult) |
| ResultReg = createResultReg(RC); |
| else |
| ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
| RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
| .addReg(LHSReg, getKillRegState(LHSIsKill)) |
| .addReg(RHSReg, getKillRegState(RHSIsKill)); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, uint64_t Imm, |
| bool SetFlags, bool WantResult) { |
| assert(LHSReg && "Invalid register number."); |
| |
| if (RetVT != MVT::i32 && RetVT != MVT::i64) |
| return 0; |
| |
| unsigned ShiftImm; |
| if (isUInt<12>(Imm)) |
| ShiftImm = 0; |
| else if ((Imm & 0xfff000) == Imm) { |
| ShiftImm = 12; |
| Imm >>= 12; |
| } else |
| return 0; |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { { AArch64::SUBWri, AArch64::SUBXri }, |
| { AArch64::ADDWri, AArch64::ADDXri } }, |
| { { AArch64::SUBSWri, AArch64::SUBSXri }, |
| { AArch64::ADDSWri, AArch64::ADDSXri } } |
| }; |
| bool Is64Bit = RetVT == MVT::i64; |
| unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
| const TargetRegisterClass *RC; |
| if (SetFlags) |
| RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| else |
| RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
| unsigned ResultReg; |
| if (WantResult) |
| ResultReg = createResultReg(RC); |
| else |
| ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
| .addReg(LHSReg, getKillRegState(LHSIsKill)) |
| .addImm(Imm) |
| .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, unsigned RHSReg, |
| bool RHSIsKill, |
| AArch64_AM::ShiftExtendType ShiftType, |
| uint64_t ShiftImm, bool SetFlags, |
| bool WantResult) { |
| assert(LHSReg && RHSReg && "Invalid register number."); |
| assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && |
| RHSReg != AArch64::SP && RHSReg != AArch64::WSP); |
| |
| if (RetVT != MVT::i32 && RetVT != MVT::i64) |
| return 0; |
| |
| // Don't deal with undefined shifts. |
| if (ShiftImm >= RetVT.getSizeInBits()) |
| return 0; |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { { AArch64::SUBWrs, AArch64::SUBXrs }, |
| { AArch64::ADDWrs, AArch64::ADDXrs } }, |
| { { AArch64::SUBSWrs, AArch64::SUBSXrs }, |
| { AArch64::ADDSWrs, AArch64::ADDSXrs } } |
| }; |
| bool Is64Bit = RetVT == MVT::i64; |
| unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
| const TargetRegisterClass *RC = |
| Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| unsigned ResultReg; |
| if (WantResult) |
| ResultReg = createResultReg(RC); |
| else |
| ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
| RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
| .addReg(LHSReg, getKillRegState(LHSIsKill)) |
| .addReg(RHSReg, getKillRegState(RHSIsKill)) |
| .addImm(getShifterImm(ShiftType, ShiftImm)); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, unsigned RHSReg, |
| bool RHSIsKill, |
| AArch64_AM::ShiftExtendType ExtType, |
| uint64_t ShiftImm, bool SetFlags, |
| bool WantResult) { |
| assert(LHSReg && RHSReg && "Invalid register number."); |
| assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && |
| RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); |
| |
| if (RetVT != MVT::i32 && RetVT != MVT::i64) |
| return 0; |
| |
| if (ShiftImm >= 4) |
| return 0; |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { { AArch64::SUBWrx, AArch64::SUBXrx }, |
| { AArch64::ADDWrx, AArch64::ADDXrx } }, |
| { { AArch64::SUBSWrx, AArch64::SUBSXrx }, |
| { AArch64::ADDSWrx, AArch64::ADDSXrx } } |
| }; |
| bool Is64Bit = RetVT == MVT::i64; |
| unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
| const TargetRegisterClass *RC = nullptr; |
| if (SetFlags) |
| RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| else |
| RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
| unsigned ResultReg; |
| if (WantResult) |
| ResultReg = createResultReg(RC); |
| else |
| ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
| RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
| .addReg(LHSReg, getKillRegState(LHSIsKill)) |
| .addReg(RHSReg, getKillRegState(RHSIsKill)) |
| .addImm(getArithExtendImm(ExtType, ShiftImm)); |
| return ResultReg; |
| } |
| |
| bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { |
| Type *Ty = LHS->getType(); |
| EVT EVT = TLI.getValueType(DL, Ty, true); |
| if (!EVT.isSimple()) |
| return false; |
| MVT VT = EVT.getSimpleVT(); |
| |
| switch (VT.SimpleTy) { |
| default: |
| return false; |
| case MVT::i1: |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| case MVT::i64: |
| return emitICmp(VT, LHS, RHS, IsZExt); |
| case MVT::f32: |
| case MVT::f64: |
| return emitFCmp(VT, LHS, RHS); |
| } |
| } |
| |
| bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool IsZExt) { |
| return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, |
| IsZExt) != 0; |
| } |
| |
| bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
| uint64_t Imm) { |
| return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, |
| /*SetFlags=*/true, /*WantResult=*/false) != 0; |
| } |
| |
| bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { |
| if (RetVT != MVT::f32 && RetVT != MVT::f64) |
| return false; |
| |
| // Check to see if the 2nd operand is a constant that we can encode directly |
| // in the compare. |
| bool UseImm = false; |
| if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) |
| if (CFP->isZero() && !CFP->isNegative()) |
| UseImm = true; |
| |
| unsigned LHSReg = getRegForValue(LHS); |
| if (!LHSReg) |
| return false; |
| bool LHSIsKill = hasTrivialKill(LHS); |
| |
| if (UseImm) { |
| unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) |
| .addReg(LHSReg, getKillRegState(LHSIsKill)); |
| return true; |
| } |
| |
| unsigned RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return false; |
| bool RHSIsKill = hasTrivialKill(RHS); |
| |
| unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) |
| .addReg(LHSReg, getKillRegState(LHSIsKill)) |
| .addReg(RHSReg, getKillRegState(RHSIsKill)); |
| return true; |
| } |
| |
| unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool SetFlags, bool WantResult, bool IsZExt) { |
| return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, |
| IsZExt); |
| } |
| |
| /// This method is a wrapper to simplify add emission. |
| /// |
| /// First try to emit an add with an immediate operand using emitAddSub_ri. If |
| /// that fails, then try to materialize the immediate into a register and use |
| /// emitAddSub_rr instead. |
| unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, |
| int64_t Imm) { |
| unsigned ResultReg; |
| if (Imm < 0) |
| ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); |
| else |
| ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); |
| |
| if (ResultReg) |
| return ResultReg; |
| |
| unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); |
| if (!CReg) |
| return 0; |
| |
| ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool SetFlags, bool WantResult, bool IsZExt) { |
| return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, |
| IsZExt); |
| } |
| |
| unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, unsigned RHSReg, |
| bool RHSIsKill, bool WantResult) { |
| return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, |
| RHSIsKill, /*SetFlags=*/true, WantResult); |
| } |
| |
| unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, |
| bool LHSIsKill, unsigned RHSReg, |
| bool RHSIsKill, |
| AArch64_AM::ShiftExtendType ShiftType, |
| uint64_t ShiftImm, bool WantResult) { |
| return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, |
| RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, |
| WantResult); |
| } |
| |
| unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, |
| const Value *LHS, const Value *RHS) { |
| // Canonicalize immediates to the RHS first. |
| if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) |
| std::swap(LHS, RHS); |
| |
| // Canonicalize mul by power-of-2 to the RHS. |
| if (LHS->hasOneUse() && isValueAvailable(LHS)) |
| if (isMulPowOf2(LHS)) |
| std::swap(LHS, RHS); |
| |
| // Canonicalize shift immediate to the RHS. |
| if (LHS->hasOneUse() && isValueAvailable(LHS)) |
| if (const auto *SI = dyn_cast<ShlOperator>(LHS)) |
| if (isa<ConstantInt>(SI->getOperand(1))) |
| std::swap(LHS, RHS); |
| |
| unsigned LHSReg = getRegForValue(LHS); |
| if (!LHSReg) |
| return 0; |
| bool LHSIsKill = hasTrivialKill(LHS); |
| |
| unsigned ResultReg = 0; |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) { |
| uint64_t Imm = C->getZExtValue(); |
| ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); |
| } |
| if (ResultReg) |
| return ResultReg; |
| |
| // Check if the mul can be folded into the instruction. |
| if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
| if (isMulPowOf2(RHS)) { |
| const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); |
| const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) |
| if (C->getValue().isPowerOf2()) |
| std::swap(MulLHS, MulRHS); |
| |
| assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); |
| uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); |
| |
| unsigned RHSReg = getRegForValue(MulLHS); |
| if (!RHSReg) |
| return 0; |
| bool RHSIsKill = hasTrivialKill(MulLHS); |
| ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, |
| RHSIsKill, ShiftVal); |
| if (ResultReg) |
| return ResultReg; |
| } |
| } |
| |
| // Check if the shift can be folded into the instruction. |
| if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
| if (const auto *SI = dyn_cast<ShlOperator>(RHS)) |
| if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { |
| uint64_t ShiftVal = C->getZExtValue(); |
| unsigned RHSReg = getRegForValue(SI->getOperand(0)); |
| if (!RHSReg) |
| return 0; |
| bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); |
| ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, |
| RHSIsKill, ShiftVal); |
| if (ResultReg) |
| return ResultReg; |
| } |
| } |
| |
| unsigned RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return 0; |
| bool RHSIsKill = hasTrivialKill(RHS); |
| |
| MVT VT = std::max(MVT::i32, RetVT.SimpleTy); |
| ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); |
| if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
| uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
| ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
| } |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, |
| unsigned LHSReg, bool LHSIsKill, |
| uint64_t Imm) { |
| static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
| "ISD nodes are not consecutive!"); |
| static const unsigned OpcTable[3][2] = { |
| { AArch64::ANDWri, AArch64::ANDXri }, |
| { AArch64::ORRWri, AArch64::ORRXri }, |
| { AArch64::EORWri, AArch64::EORXri } |
| }; |
| const TargetRegisterClass *RC; |
| unsigned Opc; |
| unsigned RegSize; |
| switch (RetVT.SimpleTy) { |
| default: |
| return 0; |
| case MVT::i1: |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: { |
| unsigned Idx = ISDOpc - ISD::AND; |
| Opc = OpcTable[Idx][0]; |
| RC = &AArch64::GPR32spRegClass; |
| RegSize = 32; |
| break; |
| } |
| case MVT::i64: |
| Opc = OpcTable[ISDOpc - ISD::AND][1]; |
| RC = &AArch64::GPR64spRegClass; |
| RegSize = 64; |
| break; |
| } |
| |
| if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) |
| return 0; |
| |
| unsigned ResultReg = |
| fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, |
| AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); |
| if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { |
| uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
| ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
| } |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, |
| unsigned LHSReg, bool LHSIsKill, |
| unsigned RHSReg, bool RHSIsKill, |
| uint64_t ShiftImm) { |
| static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
| "ISD nodes are not consecutive!"); |
| static const unsigned OpcTable[3][2] = { |
| { AArch64::ANDWrs, AArch64::ANDXrs }, |
| { AArch64::ORRWrs, AArch64::ORRXrs }, |
| { AArch64::EORWrs, AArch64::EORXrs } |
| }; |
| |
| // Don't deal with undefined shifts. |
| if (ShiftImm >= RetVT.getSizeInBits()) |
| return 0; |
| |
| const TargetRegisterClass *RC; |
| unsigned Opc; |
| switch (RetVT.SimpleTy) { |
| default: |
| return 0; |
| case MVT::i1: |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| Opc = OpcTable[ISDOpc - ISD::AND][0]; |
| RC = &AArch64::GPR32RegClass; |
| break; |
| case MVT::i64: |
| Opc = OpcTable[ISDOpc - ISD::AND][1]; |
| RC = &AArch64::GPR64RegClass; |
| break; |
| } |
| unsigned ResultReg = |
| fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, |
| AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); |
| if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
| uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
| ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
| } |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
| uint64_t Imm) { |
| return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); |
| } |
| |
| unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, |
| bool WantZExt, MachineMemOperand *MMO) { |
| if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
| return 0; |
| |
| // Simplify this down to something we can handle. |
| if (!simplifyAddress(Addr, VT)) |
| return 0; |
| |
| unsigned ScaleFactor = getImplicitScaleFactor(VT); |
| if (!ScaleFactor) |
| llvm_unreachable("Unexpected value type."); |
| |
| // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
| // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
| bool UseScaled = true; |
| if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
| UseScaled = false; |
| ScaleFactor = 1; |
| } |
| |
| static const unsigned GPOpcTable[2][8][4] = { |
| // Sign-extend. |
| { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, |
| AArch64::LDURXi }, |
| { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, |
| AArch64::LDURXi }, |
| { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, |
| AArch64::LDRXui }, |
| { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, |
| AArch64::LDRXui }, |
| { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, |
| AArch64::LDRXroX }, |
| { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, |
| AArch64::LDRXroX }, |
| { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, |
| AArch64::LDRXroW }, |
| { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, |
| AArch64::LDRXroW } |
| }, |
| // Zero-extend. |
| { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
| AArch64::LDURXi }, |
| { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
| AArch64::LDURXi }, |
| { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
| AArch64::LDRXui }, |
| { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
| AArch64::LDRXui }, |
| { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
| AArch64::LDRXroX }, |
| { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
| AArch64::LDRXroX }, |
| { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
| AArch64::LDRXroW }, |
| { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
| AArch64::LDRXroW } |
| } |
| }; |
| |
| static const unsigned FPOpcTable[4][2] = { |
| { AArch64::LDURSi, AArch64::LDURDi }, |
| { AArch64::LDRSui, AArch64::LDRDui }, |
| { AArch64::LDRSroX, AArch64::LDRDroX }, |
| { AArch64::LDRSroW, AArch64::LDRDroW } |
| }; |
| |
| unsigned Opc; |
| const TargetRegisterClass *RC; |
| bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
| Addr.getOffsetReg(); |
| unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
| if (Addr.getExtendType() == AArch64_AM::UXTW || |
| Addr.getExtendType() == AArch64_AM::SXTW) |
| Idx++; |
| |
| bool IsRet64Bit = RetVT == MVT::i64; |
| switch (VT.SimpleTy) { |
| default: |
| llvm_unreachable("Unexpected value type."); |
| case MVT::i1: // Intentional fall-through. |
| case MVT::i8: |
| Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; |
| RC = (IsRet64Bit && !WantZExt) ? |
| &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
| break; |
| case MVT::i16: |
| Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; |
| RC = (IsRet64Bit && !WantZExt) ? |
| &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
| break; |
| case MVT::i32: |
| Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; |
| RC = (IsRet64Bit && !WantZExt) ? |
| &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
| break; |
| case MVT::i64: |
| Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; |
| RC = &AArch64::GPR64RegClass; |
| break; |
| case MVT::f32: |
| Opc = FPOpcTable[Idx][0]; |
| RC = &AArch64::FPR32RegClass; |
| break; |
| case MVT::f64: |
| Opc = FPOpcTable[Idx][1]; |
| RC = &AArch64::FPR64RegClass; |
| break; |
| } |
| |
| // Create the base instruction, then add the operands. |
| unsigned ResultReg = createResultReg(RC); |
| MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(Opc), ResultReg); |
| addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); |
| |
| // Loading an i1 requires special handling. |
| if (VT == MVT::i1) { |
| unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); |
| assert(ANDReg && "Unexpected AND instruction emission failure."); |
| ResultReg = ANDReg; |
| } |
| |
| // For zero-extending loads to 64bit we emit a 32bit load and then convert |
| // the 32bit reg to a 64bit reg. |
| if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { |
| unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::SUBREG_TO_REG), Reg64) |
| .addImm(0) |
| .addReg(ResultReg, getKillRegState(true)) |
| .addImm(AArch64::sub_32); |
| ResultReg = Reg64; |
| } |
| return ResultReg; |
| } |
| |
| bool AArch64FastISel::selectAddSub(const Instruction *I) { |
| MVT VT; |
| if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
| return false; |
| |
| if (VT.isVector()) |
| return selectOperator(I, I->getOpcode()); |
| |
| unsigned ResultReg; |
| switch (I->getOpcode()) { |
| default: |
| llvm_unreachable("Unexpected instruction."); |
| case Instruction::Add: |
| ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| case Instruction::Sub: |
| ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| } |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectLogicalOp(const Instruction *I) { |
| MVT VT; |
| if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
| return false; |
| |
| if (VT.isVector()) |
| return selectOperator(I, I->getOpcode()); |
| |
| unsigned ResultReg; |
| switch (I->getOpcode()) { |
| default: |
| llvm_unreachable("Unexpected instruction."); |
| case Instruction::And: |
| ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| case Instruction::Or: |
| ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| case Instruction::Xor: |
| ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| } |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectLoad(const Instruction *I) { |
| MVT VT; |
| // Verify we have a legal type before going any further. Currently, we handle |
| // simple types that will directly fit in a register (i32/f32/i64/f64) or |
| // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
| if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || |
| cast<LoadInst>(I)->isAtomic()) |
| return false; |
| |
| const Value *SV = I->getOperand(0); |
| if (TLI.supportSwiftError()) { |
| // Swifterror values can come from either a function parameter with |
| // swifterror attribute or an alloca with swifterror attribute. |
| if (const Argument *Arg = dyn_cast<Argument>(SV)) { |
| if (Arg->hasSwiftErrorAttr()) |
| return false; |
| } |
| |
| if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { |
| if (Alloca->isSwiftError()) |
| return false; |
| } |
| } |
| |
| // See if we can handle this address. |
| Address Addr; |
| if (!computeAddress(I->getOperand(0), Addr, I->getType())) |
| return false; |
| |
| // Fold the following sign-/zero-extend into the load instruction. |
| bool WantZExt = true; |
| MVT RetVT = VT; |
| const Value *IntExtVal = nullptr; |
| if (I->hasOneUse()) { |
| if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { |
| if (isTypeSupported(ZE->getType(), RetVT)) |
| IntExtVal = ZE; |
| else |
| RetVT = VT; |
| } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { |
| if (isTypeSupported(SE->getType(), RetVT)) |
| IntExtVal = SE; |
| else |
| RetVT = VT; |
| WantZExt = false; |
| } |
| } |
| |
| unsigned ResultReg = |
| emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); |
| if (!ResultReg) |
| return false; |
| |
| // There are a few different cases we have to handle, because the load or the |
| // sign-/zero-extend might not be selected by FastISel if we fall-back to |
| // SelectionDAG. There is also an ordering issue when both instructions are in |
| // different basic blocks. |
| // 1.) The load instruction is selected by FastISel, but the integer extend |
| // not. This usually happens when the integer extend is in a different |
| // basic block and SelectionDAG took over for that basic block. |
| // 2.) The load instruction is selected before the integer extend. This only |
| // happens when the integer extend is in a different basic block. |
| // 3.) The load instruction is selected by SelectionDAG and the integer extend |
| // by FastISel. This happens if there are instructions between the load |
| // and the integer extend that couldn't be selected by FastISel. |
| if (IntExtVal) { |
| // The integer extend hasn't been emitted yet. FastISel or SelectionDAG |
| // could select it. Emit a copy to subreg if necessary. FastISel will remove |
| // it when it selects the integer extend. |
| unsigned Reg = lookUpRegForValue(IntExtVal); |
| auto *MI = MRI.getUniqueVRegDef(Reg); |
| if (!MI) { |
| if (RetVT == MVT::i64 && VT <= MVT::i32) { |
| if (WantZExt) { |
| // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). |
| MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); |
| ResultReg = std::prev(I)->getOperand(0).getReg(); |
| removeDeadCode(I, std::next(I)); |
| } else |
| ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, |
| /*IsKill=*/true, |
| AArch64::sub_32); |
| } |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| // The integer extend has already been emitted - delete all the instructions |
| // that have been emitted by the integer extend lowering code and use the |
| // result from the load instruction directly. |
| while (MI) { |
| Reg = 0; |
| for (auto &Opnd : MI->uses()) { |
| if (Opnd.isReg()) { |
| Reg = Opnd.getReg(); |
| break; |
| } |
| } |
| MachineBasicBlock::iterator I(MI); |
| removeDeadCode(I, std::next(I)); |
| MI = nullptr; |
| if (Reg) |
| MI = MRI.getUniqueVRegDef(Reg); |
| } |
| updateValueMap(IntExtVal, ResultReg); |
| return true; |
| } |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, |
| unsigned AddrReg, |
| MachineMemOperand *MMO) { |
| unsigned Opc; |
| switch (VT.SimpleTy) { |
| default: return false; |
| case MVT::i8: Opc = AArch64::STLRB; break; |
| case MVT::i16: Opc = AArch64::STLRH; break; |
| case MVT::i32: Opc = AArch64::STLRW; break; |
| case MVT::i64: Opc = AArch64::STLRX; break; |
| } |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| SrcReg = constrainOperandRegClass(II, SrcReg, 0); |
| AddrReg = constrainOperandRegClass(II, AddrReg, 1); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) |
| .addReg(SrcReg) |
| .addReg(AddrReg) |
| .addMemOperand(MMO); |
| return true; |
| } |
| |
| bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, |
| MachineMemOperand *MMO) { |
| if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
| return false; |
| |
| // Simplify this down to something we can handle. |
| if (!simplifyAddress(Addr, VT)) |
| return false; |
| |
| unsigned ScaleFactor = getImplicitScaleFactor(VT); |
| if (!ScaleFactor) |
| llvm_unreachable("Unexpected value type."); |
| |
| // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
| // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
| bool UseScaled = true; |
| if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
| UseScaled = false; |
| ScaleFactor = 1; |
| } |
| |
| static const unsigned OpcTable[4][6] = { |
| { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, |
| AArch64::STURSi, AArch64::STURDi }, |
| { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, |
| AArch64::STRSui, AArch64::STRDui }, |
| { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, |
| AArch64::STRSroX, AArch64::STRDroX }, |
| { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, |
| AArch64::STRSroW, AArch64::STRDroW } |
| }; |
| |
| unsigned Opc; |
| bool VTIsi1 = false; |
| bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
| Addr.getOffsetReg(); |
| unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
| if (Addr.getExtendType() == AArch64_AM::UXTW || |
| Addr.getExtendType() == AArch64_AM::SXTW) |
| Idx++; |
| |
| switch (VT.SimpleTy) { |
| default: llvm_unreachable("Unexpected value type."); |
| case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; |
| case MVT::i8: Opc = OpcTable[Idx][0]; break; |
| case MVT::i16: Opc = OpcTable[Idx][1]; break; |
| case MVT::i32: Opc = OpcTable[Idx][2]; break; |
| case MVT::i64: Opc = OpcTable[Idx][3]; break; |
| case MVT::f32: Opc = OpcTable[Idx][4]; break; |
| case MVT::f64: Opc = OpcTable[Idx][5]; break; |
| } |
| |
| // Storing an i1 requires special handling. |
| if (VTIsi1 && SrcReg != AArch64::WZR) { |
| unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); |
| assert(ANDReg && "Unexpected AND instruction emission failure."); |
| SrcReg = ANDReg; |
| } |
| // Create the base instruction, then add the operands. |
| const MCInstrDesc &II = TII.get(Opc); |
| SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); |
| MachineInstrBuilder MIB = |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); |
| addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); |
| |
| return true; |
| } |
| |
| bool AArch64FastISel::selectStore(const Instruction *I) { |
| MVT VT; |
| const Value *Op0 = I->getOperand(0); |
| // Verify we have a legal type before going any further. Currently, we handle |
| // simple types that will directly fit in a register (i32/f32/i64/f64) or |
| // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
| if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) |
| return false; |
| |
| const Value *PtrV = I->getOperand(1); |
| if (TLI.supportSwiftError()) { |
| // Swifterror values can come from either a function parameter with |
| // swifterror attribute or an alloca with swifterror attribute. |
| if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { |
| if (Arg->hasSwiftErrorAttr()) |
| return false; |
| } |
| |
| if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { |
| if (Alloca->isSwiftError()) |
| return false; |
| } |
| } |
| |
| // Get the value to be stored into a register. Use the zero register directly |
| // when possible to avoid an unnecessary copy and a wasted register. |
| unsigned SrcReg = 0; |
| if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { |
| if (CI->isZero()) |
| SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
| } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { |
| if (CF->isZero() && !CF->isNegative()) { |
| VT = MVT::getIntegerVT(VT.getSizeInBits()); |
| SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
| } |
| } |
| |
| if (!SrcReg) |
| SrcReg = getRegForValue(Op0); |
| |
| if (!SrcReg) |
| return false; |
| |
| auto *SI = cast<StoreInst>(I); |
| |
| // Try to emit a STLR for seq_cst/release. |
| if (SI->isAtomic()) { |
| AtomicOrdering Ord = SI->getOrdering(); |
| // The non-atomic instructions are sufficient for relaxed stores. |
| if (isReleaseOrStronger(Ord)) { |
| // The STLR addressing mode only supports a base reg; pass that directly. |
| unsigned AddrReg = getRegForValue(PtrV); |
| return emitStoreRelease(VT, SrcReg, AddrReg, |
| createMachineMemOperandFor(I)); |
| } |
| } |
| |
| // See if we can handle this address. |
| Address Addr; |
| if (!computeAddress(PtrV, Addr, Op0->getType())) |
| return false; |
| |
| if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) |
| return false; |
| return true; |
| } |
| |
| static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { |
| switch (Pred) { |
| case CmpInst::FCMP_ONE: |
| case CmpInst::FCMP_UEQ: |
| default: |
| // AL is our "false" for now. The other two need more compares. |
| return AArch64CC::AL; |
| case CmpInst::ICMP_EQ: |
| case CmpInst::FCMP_OEQ: |
| return AArch64CC::EQ; |
| case CmpInst::ICMP_SGT: |
| case CmpInst::FCMP_OGT: |
| return AArch64CC::GT; |
| case CmpInst::ICMP_SGE: |
| case CmpInst::FCMP_OGE: |
| return AArch64CC::GE; |
| case CmpInst::ICMP_UGT: |
| case CmpInst::FCMP_UGT: |
| return AArch64CC::HI; |
| case CmpInst::FCMP_OLT: |
| return AArch64CC::MI; |
| case CmpInst::ICMP_ULE: |
| case CmpInst::FCMP_OLE: |
| return AArch64CC::LS; |
| case CmpInst::FCMP_ORD: |
| return AArch64CC::VC; |
| case CmpInst::FCMP_UNO: |
| return AArch64CC::VS; |
| case CmpInst::FCMP_UGE: |
| return AArch64CC::PL; |
| case CmpInst::ICMP_SLT: |
| case CmpInst::FCMP_ULT: |
| return AArch64CC::LT; |
| case CmpInst::ICMP_SLE: |
| case CmpInst::FCMP_ULE: |
| return AArch64CC::LE; |
| case CmpInst::FCMP_UNE: |
| case CmpInst::ICMP_NE: |
| return AArch64CC::NE; |
| case CmpInst::ICMP_UGE: |
| return AArch64CC::HS; |
| case CmpInst::ICMP_ULT: |
| return AArch64CC::LO; |
| } |
| } |
| |
| /// Try to emit a combined compare-and-branch instruction. |
| bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { |
| // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions |
| // will not be produced, as they are conditional branch instructions that do |
| // not set flags. |
| if (FuncInfo.MF->getFunction().hasFnAttribute( |
| Attribute::SpeculativeLoadHardening)) |
| return false; |
| |
| assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); |
| const CmpInst *CI = cast<CmpInst>(BI->getCondition()); |
| CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
| |
| const Value *LHS = CI->getOperand(0); |
| const Value *RHS = CI->getOperand(1); |
| |
| MVT VT; |
| if (!isTypeSupported(LHS->getType(), VT)) |
| return false; |
| |
| unsigned BW = VT.getSizeInBits(); |
| if (BW > 64) |
| return false; |
| |
| MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
| MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; |
| |
| // Try to take advantage of fallthrough opportunities. |
| if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { |
| std::swap(TBB, FBB); |
| Predicate = CmpInst::getInversePredicate(Predicate); |
| } |
| |
| int TestBit = -1; |
| bool IsCmpNE; |
| switch (Predicate) { |
| default: |
| return false; |
| case CmpInst::ICMP_EQ: |
| case CmpInst::ICMP_NE: |
| if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) |
| std::swap(LHS, RHS); |
| |
| if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) |
| return false; |
| |
| if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) |
| if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { |
| const Value *AndLHS = AI->getOperand(0); |
| const Value *AndRHS = AI->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) |
| if (C->getValue().isPowerOf2()) |
| std::swap(AndLHS, AndRHS); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) |
| if (C->getValue().isPowerOf2()) { |
| TestBit = C->getValue().logBase2(); |
| LHS = AndLHS; |
| } |
| } |
| |
| if (VT == MVT::i1) |
| TestBit = 0; |
| |
| IsCmpNE = Predicate == CmpInst::ICMP_NE; |
| break; |
| case CmpInst::ICMP_SLT: |
| case CmpInst::ICMP_SGE: |
| if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) |
| return false; |
| |
| TestBit = BW - 1; |
| IsCmpNE = Predicate == CmpInst::ICMP_SLT; |
| break; |
| case CmpInst::ICMP_SGT: |
| case CmpInst::ICMP_SLE: |
| if (!isa<ConstantInt>(RHS)) |
| return false; |
| |
| if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) |
| return false; |
| |
| TestBit = BW - 1; |
| IsCmpNE = Predicate == CmpInst::ICMP_SLE; |
| break; |
| } // end switch |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { {AArch64::CBZW, AArch64::CBZX }, |
| {AArch64::CBNZW, AArch64::CBNZX} }, |
| { {AArch64::TBZW, AArch64::TBZX }, |
| {AArch64::TBNZW, AArch64::TBNZX} } |
| }; |
| |
| bool IsBitTest = TestBit != -1; |
| bool Is64Bit = BW == 64; |
| if (TestBit < 32 && TestBit >= 0) |
| Is64Bit = false; |
| |
| unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; |
| const MCInstrDesc &II = TII.get(Opc); |
| |
| unsigned SrcReg = getRegForValue(LHS); |
| if (!SrcReg) |
| return false; |
| bool SrcIsKill = hasTrivialKill(LHS); |
| |
| if (BW == 64 && !Is64Bit) |
| SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, |
| AArch64::sub_32); |
| |
| if ((BW < 32) && !IsBitTest) |
| SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); |
| |
| // Emit the combined compare and branch instruction. |
| SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); |
| MachineInstrBuilder MIB = |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) |
| .addReg(SrcReg, getKillRegState(SrcIsKill)); |
| if (IsBitTest) |
| MIB.addImm(TestBit); |
| MIB.addMBB(TBB); |
| |
| finishCondBranch(BI->getParent(), TBB, FBB); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectBranch(const Instruction *I) { |
| const BranchInst *BI = cast<BranchInst>(I); |
| if (BI->isUnconditional()) { |
| MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
| fastEmitBranch(MSucc, BI->getDebugLoc()); |
| return true; |
| } |
| |
| MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
| MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; |
| |
| if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { |
| if (CI->hasOneUse() && isValueAvailable(CI)) { |
| // Try to optimize or fold the cmp. |
| CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
| switch (Predicate) { |
| default: |
| break; |
| case CmpInst::FCMP_FALSE: |
| fastEmitBranch(FBB, DbgLoc); |
| return true; |
| case CmpInst::FCMP_TRUE: |
| fastEmitBranch(TBB, DbgLoc); |
| return true; |
| } |
| |
| // Try to emit a combined compare-and-branch first. |
| if (emitCompareAndBranch(BI)) |
| return true; |
| |
| // Try to take advantage of fallthrough opportunities. |
| if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { |
| std::swap(TBB, FBB); |
| Predicate = CmpInst::getInversePredicate(Predicate); |
| } |
| |
| // Emit the cmp. |
| if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) |
| return false; |
| |
| // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch |
| // instruction. |
| AArch64CC::CondCode CC = getCompareCC(Predicate); |
| AArch64CC::CondCode ExtraCC = AArch64CC::AL; |
| switch (Predicate) { |
| default: |
| break; |
| case CmpInst::FCMP_UEQ: |
| ExtraCC = AArch64CC::EQ; |
| CC = AArch64CC::VS; |
| break; |
| case CmpInst::FCMP_ONE: |
| ExtraCC = AArch64CC::MI; |
| CC = AArch64CC::GT; |
| break; |
| } |
| assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
| |
| // Emit the extra branch for FCMP_UEQ and FCMP_ONE. |
| if (ExtraCC != AArch64CC::AL) { |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) |
| .addImm(ExtraCC) |
| .addMBB(TBB); |
| } |
| |
| // Emit the branch. |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) |
| .addImm(CC) |
| .addMBB(TBB); |
| |
| finishCondBranch(BI->getParent(), TBB, FBB); |
| return true; |
| } |
| } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { |
| uint64_t Imm = CI->getZExtValue(); |
| MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) |
| .addMBB(Target); |
| |
| // Obtain the branch probability and add the target to the successor list. |
| if (FuncInfo.BPI) { |
| auto BranchProbability = FuncInfo.BPI->getEdgeProbability( |
| BI->getParent(), Target->getBasicBlock()); |
| FuncInfo.MBB->addSuccessor(Target, BranchProbability); |
| } else |
| FuncInfo.MBB->addSuccessorWithoutProb(Target); |
| return true; |
| } else { |
| AArch64CC::CondCode CC = AArch64CC::NE; |
| if (foldXALUIntrinsic(CC, I, BI->getCondition())) { |
| // Fake request the condition, otherwise the intrinsic might be completely |
| // optimized away. |
| unsigned CondReg = getRegForValue(BI->getCondition()); |
| if (!CondReg) |
| return false; |
| |
| // Emit the branch. |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) |
| .addImm(CC) |
| .addMBB(TBB); |
| |
| finishCondBranch(BI->getParent(), TBB, FBB); |
| return true; |
| } |
| } |
| |
| unsigned CondReg = getRegForValue(BI->getCondition()); |
| if (CondReg == 0) |
| return false; |
| bool CondRegIsKill = hasTrivialKill(BI->getCondition()); |
| |
| // i1 conditions come as i32 values, test the lowest bit with tb(n)z. |
| unsigned Opcode = AArch64::TBNZW; |
| if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { |
| std::swap(TBB, FBB); |
| Opcode = AArch64::TBZW; |
| } |
| |
| const MCInstrDesc &II = TII.get(Opcode); |
| unsigned ConstrainedCondReg |
| = constrainOperandRegClass(II, CondReg, II.getNumDefs()); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) |
| .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) |
| .addImm(0) |
| .addMBB(TBB); |
| |
| finishCondBranch(BI->getParent(), TBB, FBB); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectIndirectBr(const Instruction *I) { |
| const IndirectBrInst *BI = cast<IndirectBrInst>(I); |
| unsigned AddrReg = getRegForValue(BI->getOperand(0)); |
| if (AddrReg == 0) |
| return false; |
| |
| // Emit the indirect branch. |
| const MCInstrDesc &II = TII.get(AArch64::BR); |
| AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); |
| |
| // Make sure the CFG is up-to-date. |
| for (auto *Succ : BI->successors()) |
| FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); |
| |
| return true; |
| } |
| |
| bool AArch64FastISel::selectCmp(const Instruction *I) { |
| const CmpInst *CI = cast<CmpInst>(I); |
| |
| // Vectors of i1 are weird: bail out. |
| if (CI->getType()->isVectorTy()) |
| return false; |
| |
| // Try to optimize or fold the cmp. |
| CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
| unsigned ResultReg = 0; |
| switch (Predicate) { |
| default: |
| break; |
| case CmpInst::FCMP_FALSE: |
| ResultReg = createResultReg(&AArch64::GPR32RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), ResultReg) |
| .addReg(AArch64::WZR, getKillRegState(true)); |
| break; |
| case CmpInst::FCMP_TRUE: |
| ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); |
| break; |
| } |
| |
| if (ResultReg) { |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| // Emit the cmp. |
| if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) |
| return false; |
| |
| ResultReg = createResultReg(&AArch64::GPR32RegClass); |
| |
| // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These |
| // condition codes are inverted, because they are used by CSINC. |
| static unsigned CondCodeTable[2][2] = { |
| { AArch64CC::NE, AArch64CC::VC }, |
| { AArch64CC::PL, AArch64CC::LE } |
| }; |
| unsigned *CondCodes = nullptr; |
| switch (Predicate) { |
| default: |
| break; |
| case CmpInst::FCMP_UEQ: |
| CondCodes = &CondCodeTable[0][0]; |
| break; |
| case CmpInst::FCMP_ONE: |
| CondCodes = &CondCodeTable[1][0]; |
| break; |
| } |
| |
| if (CondCodes) { |
| unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), |
| TmpReg1) |
| .addReg(AArch64::WZR, getKillRegState(true)) |
| .addReg(AArch64::WZR, getKillRegState(true)) |
| .addImm(CondCodes[0]); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), |
| ResultReg) |
| .addReg(TmpReg1, getKillRegState(true)) |
| .addReg(AArch64::WZR, getKillRegState(true)) |
| .addImm(CondCodes[1]); |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| // Now set a register based on the comparison. |
| AArch64CC::CondCode CC = getCompareCC(Predicate); |
| assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
| AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), |
| ResultReg) |
| .addReg(AArch64::WZR, getKillRegState(true)) |
| .addReg(AArch64::WZR, getKillRegState(true)) |
| .addImm(invertedCC); |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| /// Optimize selects of i1 if one of the operands has a 'true' or 'false' |
| /// value. |
| bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { |
| if (!SI->getType()->isIntegerTy(1)) |
| return false; |
| |
| const Value *Src1Val, *Src2Val; |
| unsigned Opc = 0; |
| bool NeedExtraOp = false; |
| if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { |
| if (CI->isOne()) { |
| Src1Val = SI->getCondition(); |
| Src2Val = SI->getFalseValue(); |
| Opc = AArch64::ORRWrr; |
| } else { |
| assert(CI->isZero()); |
| Src1Val = SI->getFalseValue(); |
| Src2Val = SI->getCondition(); |
| Opc = AArch64::BICWrr; |
| } |
| } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { |
| if (CI->isOne()) { |
| Src1Val = SI->getCondition(); |
| Src2Val = SI->getTrueValue(); |
| Opc = AArch64::ORRWrr; |
| NeedExtraOp = true; |
| } else { |
| assert(CI->isZero()); |
| Src1Val = SI->getCondition(); |
| Src2Val = SI->getTrueValue(); |
| Opc = AArch64::ANDWrr; |
| } |
| } |
| |
| if (!Opc) |
| return false; |
| |
| unsigned Src1Reg = getRegForValue(Src1Val); |
| if (!Src1Reg) |
| return false; |
| bool Src1IsKill = hasTrivialKill(Src1Val); |
| |
| unsigned Src2Reg = getRegForValue(Src2Val); |
| if (!Src2Reg) |
| return false; |
| bool Src2IsKill = hasTrivialKill(Src2Val); |
| |
| if (NeedExtraOp) { |
| Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); |
| Src1IsKill = true; |
| } |
| unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, |
| Src1IsKill, Src2Reg, Src2IsKill); |
| updateValueMap(SI, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectSelect(const Instruction *I) { |
| assert(isa<SelectInst>(I) && "Expected a select instruction."); |
| MVT VT; |
| if (!isTypeSupported(I->getType(), VT)) |
| return false; |
| |
| unsigned Opc; |
| const TargetRegisterClass *RC; |
| switch (VT.SimpleTy) { |
| default: |
| return false; |
| case MVT::i1: |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| Opc = AArch64::CSELWr; |
| RC = &AArch64::GPR32RegClass; |
| break; |
| case MVT::i64: |
| Opc = AArch64::CSELXr; |
| RC = &AArch64::GPR64RegClass; |
| break; |
| case MVT::f32: |
| Opc = AArch64::FCSELSrrr; |
| RC = &AArch64::FPR32RegClass; |
| break; |
| case MVT::f64: |
| Opc = AArch64::FCSELDrrr; |
| RC = &AArch64::FPR64RegClass; |
| break; |
| } |
| |
| const SelectInst *SI = cast<SelectInst>(I); |
| const Value *Cond = SI->getCondition(); |
| AArch64CC::CondCode CC = AArch64CC::NE; |
| AArch64CC::CondCode ExtraCC = AArch64CC::AL; |
| |
| if (optimizeSelect(SI)) |
| return true; |
| |
| // Try to pickup the flags, so we don't have to emit another compare. |
| if (foldXALUIntrinsic(CC, I, Cond)) { |
| // Fake request the condition to force emission of the XALU intrinsic. |
| unsigned CondReg = getRegForValue(Cond); |
| if (!CondReg) |
| return false; |
| } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && |
| isValueAvailable(Cond)) { |
| const auto *Cmp = cast<CmpInst>(Cond); |
| // Try to optimize or fold the cmp. |
| CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); |
| const Value *FoldSelect = nullptr; |
| switch (Predicate) { |
| default: |
| break; |
| case CmpInst::FCMP_FALSE: |
| FoldSelect = SI->getFalseValue(); |
| break; |
| case CmpInst::FCMP_TRUE: |
| FoldSelect = SI->getTrueValue(); |
| break; |
| } |
| |
| if (FoldSelect) { |
| unsigned SrcReg = getRegForValue(FoldSelect); |
| if (!SrcReg) |
| return false; |
| unsigned UseReg = lookUpRegForValue(SI); |
| if (UseReg) |
| MRI.clearKillFlags(UseReg); |
| |
| updateValueMap(I, SrcReg); |
| return true; |
| } |
| |
| // Emit the cmp. |
| if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) |
| return false; |
| |
| // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. |
| CC = getCompareCC(Predicate); |
| switch (Predicate) { |
| default: |
| break; |
| case CmpInst::FCMP_UEQ: |
| ExtraCC = AArch64CC::EQ; |
| CC = AArch64CC::VS; |
| break; |
| case CmpInst::FCMP_ONE: |
| ExtraCC = AArch64CC::MI; |
| CC = AArch64CC::GT; |
| break; |
| } |
| assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
| } else { |
| unsigned CondReg = getRegForValue(Cond); |
| if (!CondReg) |
| return false; |
| bool CondIsKill = hasTrivialKill(Cond); |
| |
| const MCInstrDesc &II = TII.get(AArch64::ANDSWri); |
| CondReg = constrainOperandRegClass(II, CondReg, 1); |
| |
| // Emit a TST instruction (ANDS wzr, reg, #imm). |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, |
| AArch64::WZR) |
| .addReg(CondReg, getKillRegState(CondIsKill)) |
| .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); |
| } |
| |
| unsigned Src1Reg = getRegForValue(SI->getTrueValue()); |
| bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); |
| |
| unsigned Src2Reg = getRegForValue(SI->getFalseValue()); |
| bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); |
| |
| if (!Src1Reg || !Src2Reg) |
| return false; |
| |
| if (ExtraCC != AArch64CC::AL) { |
| Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, |
| Src2IsKill, ExtraCC); |
| Src2IsKill = true; |
| } |
| unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, |
| Src2IsKill, CC); |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectFPExt(const Instruction *I) { |
| Value *V = I->getOperand(0); |
| if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) |
| return false; |
| |
| unsigned Op = getRegForValue(V); |
| if (Op == 0) |
| return false; |
| |
| unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), |
| ResultReg).addReg(Op); |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectFPTrunc(const Instruction *I) { |
| Value *V = I->getOperand(0); |
| if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) |
| return false; |
| |
| unsigned Op = getRegForValue(V); |
| if (Op == 0) |
| return false; |
| |
| unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), |
| ResultReg).addReg(Op); |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| // FPToUI and FPToSI |
| bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { |
| MVT DestVT; |
| if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) |
| return false; |
| |
| unsigned SrcReg = getRegForValue(I->getOperand(0)); |
| if (SrcReg == 0) |
| return false; |
| |
| EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); |
| if (SrcVT == MVT::f128 || SrcVT == MVT::f16) |
| return false; |
| |
| unsigned Opc; |
| if (SrcVT == MVT::f64) { |
| if (Signed) |
| Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; |
| else |
| Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; |
| } else { |
| if (Signed) |
| Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; |
| else |
| Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; |
| } |
| unsigned ResultReg = createResultReg( |
| DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) |
| .addReg(SrcReg); |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { |
| MVT DestVT; |
| if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) |
| return false; |
| // Let regular ISEL handle FP16 |
| if (DestVT == MVT::f16) |
| return false; |
| |
| assert((DestVT == MVT::f32 || DestVT == MVT::f64) && |
| "Unexpected value type."); |
| |
| unsigned SrcReg = getRegForValue(I->getOperand(0)); |
| if (!SrcReg) |
| return false; |
| bool SrcIsKill = hasTrivialKill(I->getOperand(0)); |
| |
| EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); |
| |
| // Handle sign-extension. |
| if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { |
| SrcReg = |
| emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); |
| if (!SrcReg) |
| return false; |
| SrcIsKill = true; |
| } |
| |
| unsigned Opc; |
| if (SrcVT == MVT::i64) { |
| if (Signed) |
| Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; |
| else |
| Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; |
| } else { |
| if (Signed) |
| Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; |
| else |
| Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; |
| } |
| |
| unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, |
| SrcIsKill); |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::fastLowerArguments() { |
| if (!FuncInfo.CanLowerReturn) |
| return false; |
| |
| const Function *F = FuncInfo.Fn; |
| if (F->isVarArg()) |
| return false; |
| |
| CallingConv::ID CC = F->getCallingConv(); |
| if (CC != CallingConv::C && CC != CallingConv::Swift) |
| return false; |
| |
| if (Subtarget->hasCustomCallingConv()) |
| return false; |
| |
| // Only handle simple cases of up to 8 GPR and FPR each. |
| unsigned GPRCnt = 0; |
| unsigned FPRCnt = 0; |
| for (auto const &Arg : F->args()) { |
| if (Arg.hasAttribute(Attribute::ByVal) || |
| Arg.hasAttribute(Attribute::InReg) || |
| Arg.hasAttribute(Attribute::StructRet) || |
| Arg.hasAttribute(Attribute::SwiftSelf) || |
| Arg.hasAttribute(Attribute::SwiftError) || |
| Arg.hasAttribute(Attribute::Nest)) |
| return false; |
| |
| Type *ArgTy = Arg.getType(); |
| if (ArgTy->isStructTy() || ArgTy->isArrayTy()) |
| return false; |
| |
| EVT ArgVT = TLI.getValueType(DL, ArgTy); |
| if (!ArgVT.isSimple()) |
| return false; |
| |
| MVT VT = ArgVT.getSimpleVT().SimpleTy; |
| if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) |
| return false; |
| |
| if (VT.isVector() && |
| (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) |
| return false; |
| |
| if (VT >= MVT::i1 && VT <= MVT::i64) |
| ++GPRCnt; |
| else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || |
| VT.is128BitVector()) |
| ++FPRCnt; |
| else |
| return false; |
| |
| if (GPRCnt > 8 || FPRCnt > 8) |
| return false; |
| } |
| |
| static const MCPhysReg Registers[6][8] = { |
| { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, |
| AArch64::W5, AArch64::W6, AArch64::W7 }, |
| { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, |
| AArch64::X5, AArch64::X6, AArch64::X7 }, |
| { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, |
| AArch64::H5, AArch64::H6, AArch64::H7 }, |
| { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, |
| AArch64::S5, AArch64::S6, AArch64::S7 }, |
| { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, |
| AArch64::D5, AArch64::D6, AArch64::D7 }, |
| { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, |
| AArch64::Q5, AArch64::Q6, AArch64::Q7 } |
| }; |
| |
| unsigned GPRIdx = 0; |
| unsigned FPRIdx = 0; |
| for (auto const &Arg : F->args()) { |
| MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); |
| unsigned SrcReg; |
| const TargetRegisterClass *RC; |
| if (VT >= MVT::i1 && VT <= MVT::i32) { |
| SrcReg = Registers[0][GPRIdx++]; |
| RC = &AArch64::GPR32RegClass; |
| VT = MVT::i32; |
| } else if (VT == MVT::i64) { |
| SrcReg = Registers[1][GPRIdx++]; |
| RC = &AArch64::GPR64RegClass; |
| } else if (VT == MVT::f16) { |
| SrcReg = Registers[2][FPRIdx++]; |
| RC = &AArch64::FPR16RegClass; |
| } else if (VT == MVT::f32) { |
| SrcReg = Registers[3][FPRIdx++]; |
| RC = &AArch64::FPR32RegClass; |
| } else if ((VT == MVT::f64) || VT.is64BitVector()) { |
| SrcReg = Registers[4][FPRIdx++]; |
| RC = &AArch64::FPR64RegClass; |
| } else if (VT.is128BitVector()) { |
| SrcReg = Registers[5][FPRIdx++]; |
| RC = &AArch64::FPR128RegClass; |
| } else |
| llvm_unreachable("Unexpected value type."); |
| |
| unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); |
| // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. |
| // Without this, EmitLiveInCopies may eliminate the livein if its only |
| // use is a bitcast (which isn't turned into an instruction). |
| unsigned ResultReg = createResultReg(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), ResultReg) |
| .addReg(DstReg, getKillRegState(true)); |
| updateValueMap(&Arg, ResultReg); |
| } |
| return true; |
| } |
| |
| bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, |
| SmallVectorImpl<MVT> &OutVTs, |
| unsigned &NumBytes) { |
| CallingConv::ID CC = CLI.CallConv; |
| SmallVector<CCValAssign, 16> ArgLocs; |
| CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); |
| CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); |
| |
| // Get a count of how many bytes are to be pushed on the stack. |
| NumBytes = CCInfo.getNextStackOffset(); |
| |
| // Issue CALLSEQ_START |
| unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) |
| .addImm(NumBytes).addImm(0); |
| |
| // Process the args. |
| for (CCValAssign &VA : ArgLocs) { |
| const Value *ArgVal = CLI.OutVals[VA.getValNo()]; |
| MVT ArgVT = OutVTs[VA.getValNo()]; |
| |
| unsigned ArgReg = getRegForValue(ArgVal); |
| if (!ArgReg) |
| return false; |
| |
| // Handle arg promotion: SExt, ZExt, AExt. |
| switch (VA.getLocInfo()) { |
| case CCValAssign::Full: |
| break; |
| case CCValAssign::SExt: { |
| MVT DestVT = VA.getLocVT(); |
| MVT SrcVT = ArgVT; |
| ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); |
| if (!ArgReg) |
| return false; |
| break; |
| } |
| case CCValAssign::AExt: |
| // Intentional fall-through. |
| case CCValAssign::ZExt: { |
| MVT DestVT = VA.getLocVT(); |
| MVT SrcVT = ArgVT; |
| ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); |
| if (!ArgReg) |
| return false; |
| break; |
| } |
| default: |
| llvm_unreachable("Unknown arg promotion!"); |
| } |
| |
| // Now copy/store arg to correct locations. |
| if (VA.isRegLoc() && !VA.needsCustom()) { |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); |
| CLI.OutRegs.push_back(VA.getLocReg()); |
| } else if (VA.needsCustom()) { |
| // FIXME: Handle custom args. |
| return false; |
| } else { |
| assert(VA.isMemLoc() && "Assuming store on stack."); |
| |
| // Don't emit stores for undef values. |
| if (isa<UndefValue>(ArgVal)) |
| continue; |
| |
| // Need to store on the stack. |
| unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; |
| |
| unsigned BEAlign = 0; |
| if (ArgSize < 8 && !Subtarget->isLittleEndian()) |
| BEAlign = 8 - ArgSize; |
| |
| Address Addr; |
| Addr.setKind(Address::RegBase); |
| Addr.setReg(AArch64::SP); |
| Addr.setOffset(VA.getLocMemOffset() + BEAlign); |
| |
| unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); |
| MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( |
| MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), |
| MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); |
| |
| if (!emitStore(ArgVT, ArgReg, Addr, MMO)) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, |
| unsigned NumBytes) { |
| CallingConv::ID CC = CLI.CallConv; |
| |
| // Issue CALLSEQ_END |
| unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) |
| .addImm(NumBytes).addImm(0); |
| |
| // Now the return value. |
| if (RetVT != MVT::isVoid) { |
| SmallVector<CCValAssign, 16> RVLocs; |
| CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); |
| CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); |
| |
| // Only handle a single return value. |
| if (RVLocs.size() != 1) |
| return false; |
| |
| // Copy all of the result registers out of their specified physreg. |
| MVT CopyVT = RVLocs[0].getValVT(); |
| |
| // TODO: Handle big-endian results |
| if (CopyVT.isVector() && !Subtarget->isLittleEndian()) |
| return false; |
| |
| unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), ResultReg) |
| .addReg(RVLocs[0].getLocReg()); |
| CLI.InRegs.push_back(RVLocs[0].getLocReg()); |
| |
| CLI.ResultReg = ResultReg; |
| CLI.NumResultRegs = 1; |
| } |
| |
| return true; |
| } |
| |
| bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { |
| CallingConv::ID CC = CLI.CallConv; |
| bool IsTailCall = CLI.IsTailCall; |
| bool IsVarArg = CLI.IsVarArg; |
| const Value *Callee = CLI.Callee; |
| MCSymbol *Symbol = CLI.Symbol; |
| |
| if (!Callee && !Symbol) |
| return false; |
| |
| // Allow SelectionDAG isel to handle tail calls. |
| if (IsTailCall) |
| return false; |
| |
| // FIXME: we could and should support this, but for now correctness at -O0 is |
| // more important. |
| if (Subtarget->isTargetILP32()) |
| return false; |
| |
| CodeModel::Model CM = TM.getCodeModel(); |
| // Only support the small-addressing and large code models. |
| if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) |
| return false; |
| |
| // FIXME: Add large code model support for ELF. |
| if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) |
| return false; |
| |
| // Let SDISel handle vararg functions. |
| if (IsVarArg) |
| return false; |
| |
| // FIXME: Only handle *simple* calls for now. |
| MVT RetVT; |
| if (CLI.RetTy->isVoidTy()) |
| RetVT = MVT::isVoid; |
| else if (!isTypeLegal(CLI.RetTy, RetVT)) |
| return false; |
| |
| for (auto Flag : CLI.OutFlags) |
| if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || |
| Flag.isSwiftSelf() || Flag.isSwiftError()) |
| return false; |
| |
| // Set up the argument vectors. |
| SmallVector<MVT, 16> OutVTs; |
| OutVTs.reserve(CLI.OutVals.size()); |
| |
| for (auto *Val : CLI.OutVals) { |
| MVT VT; |
| if (!isTypeLegal(Val->getType(), VT) && |
| !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) |
| return false; |
| |
| // We don't handle vector parameters yet. |
| if (VT.isVector() || VT.getSizeInBits() > 64) |
| return false; |
| |
| OutVTs.push_back(VT); |
| } |
| |
| Address Addr; |
| if (Callee && !computeCallAddress(Callee, Addr)) |
| return false; |
| |
| // The weak function target may be zero; in that case we must use indirect |
| // addressing via a stub on windows as it may be out of range for a |
| // PC-relative jump. |
| if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && |
| Addr.getGlobalValue()->hasExternalWeakLinkage()) |
| return false; |
| |
| // Handle the arguments now that we've gotten them. |
| unsigned NumBytes; |
| if (!processCallArgs(CLI, OutVTs, NumBytes)) |
| return false; |
| |
| const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
| if (RegInfo->isAnyArgRegReserved(*MF)) |
| RegInfo->emitReservedArgRegCallError(*MF); |
| |
| // Issue the call. |
| MachineInstrBuilder MIB; |
| if (Subtarget->useSmallAddressing()) { |
| const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); |
| MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); |
| if (Symbol) |
| MIB.addSym(Symbol, 0); |
| else if (Addr.getGlobalValue()) |
| MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); |
| else if (Addr.getReg()) { |
| unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); |
| MIB.addReg(Reg); |
| } else |
| return false; |
| } else { |
| unsigned CallReg = 0; |
| if (Symbol) { |
| unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
| ADRPReg) |
| .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); |
| |
| CallReg = createResultReg(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::LDRXui), CallReg) |
| .addReg(ADRPReg) |
| .addSym(Symbol, |
| AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
| } else if (Addr.getGlobalValue()) |
| CallReg = materializeGV(Addr.getGlobalValue()); |
| else if (Addr.getReg()) |
| CallReg = Addr.getReg(); |
| |
| if (!CallReg) |
| return false; |
| |
| const MCInstrDesc &II = TII.get(AArch64::BLR); |
| CallReg = constrainOperandRegClass(II, CallReg, 0); |
| MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); |
| } |
| |
| // Add implicit physical register uses to the call. |
| for (auto Reg : CLI.OutRegs) |
| MIB.addReg(Reg, RegState::Implicit); |
| |
| // Add a register mask with the call-preserved registers. |
| // Proper defs for return values will be added by setPhysRegsDeadExcept(). |
| MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); |
| |
| CLI.Call = MIB; |
| |
| // Finish off the call including any return values. |
| return finishCall(CLI, RetVT, NumBytes); |
| } |
| |
| bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { |
| if (Alignment) |
| return Len / Alignment <= 4; |
| else |
| return Len < 32; |
| } |
| |
| bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, |
| uint64_t Len, unsigned Alignment) { |
| // Make sure we don't bloat code by inlining very large memcpy's. |
| if (!isMemCpySmall(Len, Alignment)) |
| return false; |
| |
| int64_t UnscaledOffset = 0; |
| Address OrigDest = Dest; |
| Address OrigSrc = Src; |
| |
| while (Len) { |
| MVT VT; |
| if (!Alignment || Alignment >= 8) { |
| if (Len >= 8) |
| VT = MVT::i64; |
| else if (Len >= 4) |
| VT = MVT::i32; |
| else if (Len >= 2) |
| VT = MVT::i16; |
| else { |
| VT = MVT::i8; |
| } |
| } else { |
| // Bound based on alignment. |
| if (Len >= 4 && Alignment == 4) |
| VT = MVT::i32; |
| else if (Len >= 2 && Alignment == 2) |
| VT = MVT::i16; |
| else { |
| VT = MVT::i8; |
| } |
| } |
| |
| unsigned ResultReg = emitLoad(VT, VT, Src); |
| if (!ResultReg) |
| return false; |
| |
| if (!emitStore(VT, ResultReg, Dest)) |
| return false; |
| |
| int64_t Size = VT.getSizeInBits() / 8; |
| Len -= Size; |
| UnscaledOffset += Size; |
| |
| // We need to recompute the unscaled offset for each iteration. |
| Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); |
| Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); |
| } |
| |
| return true; |
| } |
| |
| /// Check if it is possible to fold the condition from the XALU intrinsic |
| /// into the user. The condition code will only be updated on success. |
| bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, |
| const Instruction *I, |
| const Value *Cond) { |
| if (!isa<ExtractValueInst>(Cond)) |
| return false; |
| |
| const auto *EV = cast<ExtractValueInst>(Cond); |
| if (!isa<IntrinsicInst>(EV->getAggregateOperand())) |
| return false; |
| |
| const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); |
| MVT RetVT; |
| const Function *Callee = II->getCalledFunction(); |
| Type *RetTy = |
| cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); |
| if (!isTypeLegal(RetTy, RetVT)) |
| return false; |
| |
| if (RetVT != MVT::i32 && RetVT != MVT::i64) |
| return false; |
| |
| const Value *LHS = II->getArgOperand(0); |
| const Value *RHS = II->getArgOperand(1); |
| |
| // Canonicalize immediate to the RHS. |
| if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && |
| isCommutativeIntrinsic(II)) |
| std::swap(LHS, RHS); |
| |
| // Simplify multiplies. |
| Intrinsic::ID IID = II->getIntrinsicID(); |
| switch (IID) { |
| default: |
| break; |
| case Intrinsic::smul_with_overflow: |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
| if (C->getValue() == 2) |
| IID = Intrinsic::sadd_with_overflow; |
| break; |
| case Intrinsic::umul_with_overflow: |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
| if (C->getValue() == 2) |
| IID = Intrinsic::uadd_with_overflow; |
| break; |
| } |
| |
| AArch64CC::CondCode TmpCC; |
| switch (IID) { |
| default: |
| return false; |
| case Intrinsic::sadd_with_overflow: |
| case Intrinsic::ssub_with_overflow: |
| TmpCC = AArch64CC::VS; |
| break; |
| case Intrinsic::uadd_with_overflow: |
| TmpCC = AArch64CC::HS; |
| break; |
| case Intrinsic::usub_with_overflow: |
| TmpCC = AArch64CC::LO; |
| break; |
| case Intrinsic::smul_with_overflow: |
| case Intrinsic::umul_with_overflow: |
| TmpCC = AArch64CC::NE; |
| break; |
| } |
| |
| // Check if both instructions are in the same basic block. |
| if (!isValueAvailable(II)) |
| return false; |
| |
| // Make sure nothing is in the way |
| BasicBlock::const_iterator Start(I); |
| BasicBlock::const_iterator End(II); |
| for (auto Itr = std::prev(Start); Itr != End; --Itr) { |
| // We only expect extractvalue instructions between the intrinsic and the |
| // instruction to be selected. |
| if (!isa<ExtractValueInst>(Itr)) |
| return false; |
| |
| // Check that the extractvalue operand comes from the intrinsic. |
| const auto *EVI = cast<ExtractValueInst>(Itr); |
| if (EVI->getAggregateOperand() != II) |
| return false; |
| } |
| |
| CC = TmpCC; |
| return true; |
| } |
| |
| bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { |
| // FIXME: Handle more intrinsics. |
| switch (II->getIntrinsicID()) { |
| default: return false; |
| case Intrinsic::frameaddress: { |
| MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
| MFI.setFrameAddressIsTaken(true); |
| |
| const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
| Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); |
| Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); |
| // Recursively load frame address |
| // ldr x0, [fp] |
| // ldr x0, [x0] |
| // ldr x0, [x0] |
| // ... |
| unsigned DestReg; |
| unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); |
| while (Depth--) { |
| DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, |
| SrcReg, /*IsKill=*/true, 0); |
| assert(DestReg && "Unexpected LDR instruction emission failure."); |
| SrcReg = DestReg; |
| } |
| |
| updateValueMap(II, SrcReg); |
| return true; |
| } |
| case Intrinsic::sponentry: { |
| MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
| |
| // SP = FP + Fixed Object + 16 |
| int FI = MFI.CreateFixedObject(4, 0, false); |
| unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::ADDXri), ResultReg) |
| .addFrameIndex(FI) |
| .addImm(0) |
| .addImm(0); |
| |
| updateValueMap(II, ResultReg); |
| return true; |
| } |
| case Intrinsic::memcpy: |
| case Intrinsic::memmove: { |
| const auto *MTI = cast<MemTransferInst>(II); |
| // Don't handle volatile. |
| if (MTI->isVolatile()) |
| return false; |
| |
| // Disable inlining for memmove before calls to ComputeAddress. Otherwise, |
| // we would emit dead code because we don't currently handle memmoves. |
| bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); |
| if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { |
| // Small memcpy's are common enough that we want to do them without a call |
| // if possible. |
| uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); |
| unsigned Alignment = MinAlign(MTI->getDestAlignment(), |
| MTI->getSourceAlignment()); |
| if (isMemCpySmall(Len, Alignment)) { |
| Address Dest, Src; |
| if (!computeAddress(MTI->getRawDest(), Dest) || |
| !computeAddress(MTI->getRawSource(), Src)) |
| return false; |
| if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) |
| return true; |
| } |
| } |
| |
| if (!MTI->getLength()->getType()->isIntegerTy(64)) |
| return false; |
| |
| if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) |
| // Fast instruction selection doesn't support the special |
| // address spaces. |
| return false; |
| |
| const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; |
| return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); |
| } |
| case Intrinsic::memset: { |
| const MemSetInst *MSI = cast<MemSetInst>(II); |
| // Don't handle volatile. |
| if (MSI->isVolatile()) |
| return false; |
| |
| if (!MSI->getLength()->getType()->isIntegerTy(64)) |
| return false; |
| |
| if (MSI->getDestAddressSpace() > 255) |
| // Fast instruction selection doesn't support the special |
| // address spaces. |
| return false; |
| |
| return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); |
| } |
| case Intrinsic::sin: |
| case Intrinsic::cos: |
| case Intrinsic::pow: { |
| MVT RetVT; |
| if (!isTypeLegal(II->getType(), RetVT)) |
| return false; |
| |
| if (RetVT != MVT::f32 && RetVT != MVT::f64) |
| return false; |
| |
| static const RTLIB::Libcall LibCallTable[3][2] = { |
| { RTLIB::SIN_F32, RTLIB::SIN_F64 }, |
| { RTLIB::COS_F32, RTLIB::COS_F64 }, |
| { RTLIB::POW_F32, RTLIB::POW_F64 } |
| }; |
| RTLIB::Libcall LC; |
| bool Is64Bit = RetVT == MVT::f64; |
| switch (II->getIntrinsicID()) { |
| default: |
| llvm_unreachable("Unexpected intrinsic."); |
| case Intrinsic::sin: |
| LC = LibCallTable[0][Is64Bit]; |
| break; |
| case Intrinsic::cos: |
| LC = LibCallTable[1][Is64Bit]; |
| break; |
| case Intrinsic::pow: |
| LC = LibCallTable[2][Is64Bit]; |
| break; |
| } |
| |
| ArgListTy Args; |
| Args.reserve(II->getNumArgOperands()); |
| |
| // Populate the argument list. |
| for (auto &Arg : II->arg_operands()) { |
| ArgListEntry Entry; |
| Entry.Val = Arg; |
| Entry.Ty = Arg->getType(); |
| Args.push_back(Entry); |
| } |
| |
| CallLoweringInfo CLI; |
| MCContext &Ctx = MF->getContext(); |
| CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), |
| TLI.getLibcallName(LC), std::move(Args)); |
| if (!lowerCallTo(CLI)) |
| return false; |
| updateValueMap(II, CLI.ResultReg); |
| return true; |
| } |
| case Intrinsic::fabs: { |
| MVT VT; |
| if (!isTypeLegal(II->getType(), VT)) |
| return false; |
| |
| unsigned Opc; |
| switch (VT.SimpleTy) { |
| default: |
| return false; |
| case MVT::f32: |
| Opc = AArch64::FABSSr; |
| break; |
| case MVT::f64: |
| Opc = AArch64::FABSDr; |
| break; |
| } |
| unsigned SrcReg = getRegForValue(II->getOperand(0)); |
| if (!SrcReg) |
| return false; |
| bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); |
| unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) |
| .addReg(SrcReg, getKillRegState(SrcRegIsKill)); |
| updateValueMap(II, ResultReg); |
| return true; |
| } |
| case Intrinsic::trap: |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) |
| .addImm(1); |
| return true; |
| case Intrinsic::debugtrap: { |
| if (Subtarget->isTargetWindows()) { |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) |
| .addImm(0xF000); |
| return true; |
| } |
| break; |
| } |
| |
| case Intrinsic::sqrt: { |
| Type *RetTy = II->getCalledFunction()->getReturnType(); |
| |
| MVT VT; |
| if (!isTypeLegal(RetTy, VT)) |
| return false; |
| |
| unsigned Op0Reg = getRegForValue(II->getOperand(0)); |
| if (!Op0Reg) |
| return false; |
| bool Op0IsKill = hasTrivialKill(II->getOperand(0)); |
| |
| unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(II, ResultReg); |
| return true; |
| } |
| case Intrinsic::sadd_with_overflow: |
| case Intrinsic::uadd_with_overflow: |
| case Intrinsic::ssub_with_overflow: |
| case Intrinsic::usub_with_overflow: |
| case Intrinsic::smul_with_overflow: |
| case Intrinsic::umul_with_overflow: { |
| // This implements the basic lowering of the xalu with overflow intrinsics. |
| const Function *Callee = II->getCalledFunction(); |
| auto *Ty = cast<StructType>(Callee->getReturnType()); |
| Type *RetTy = Ty->getTypeAtIndex(0U); |
| |
| MVT VT; |
| if (!isTypeLegal(RetTy, VT)) |
| return false; |
| |
| if (VT != MVT::i32 && VT != MVT::i64) |
| return false; |
| |
| const Value *LHS = II->getArgOperand(0); |
| const Value *RHS = II->getArgOperand(1); |
| // Canonicalize immediate to the RHS. |
| if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && |
| isCommutativeIntrinsic(II)) |
| std::swap(LHS, RHS); |
| |
| // Simplify multiplies. |
| Intrinsic::ID IID = II->getIntrinsicID(); |
| switch (IID) { |
| default: |
| break; |
| case Intrinsic::smul_with_overflow: |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
| if (C->getValue() == 2) { |
| IID = Intrinsic::sadd_with_overflow; |
| RHS = LHS; |
| } |
| break; |
| case Intrinsic::umul_with_overflow: |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
| if (C->getValue() == 2) { |
| IID = Intrinsic::uadd_with_overflow; |
| RHS = LHS; |
| } |
| break; |
| } |
| |
| unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; |
| AArch64CC::CondCode CC = AArch64CC::Invalid; |
| switch (IID) { |
| default: llvm_unreachable("Unexpected intrinsic!"); |
| case Intrinsic::sadd_with_overflow: |
| ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); |
| CC = AArch64CC::VS; |
| break; |
| case Intrinsic::uadd_with_overflow: |
| ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); |
| CC = AArch64CC::HS; |
| break; |
| case Intrinsic::ssub_with_overflow: |
| ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); |
| CC = AArch64CC::VS; |
| break; |
| case Intrinsic::usub_with_overflow: |
| ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); |
| CC = AArch64CC::LO; |
| break; |
| case Intrinsic::smul_with_overflow: { |
| CC = AArch64CC::NE; |
| unsigned LHSReg = getRegForValue(LHS); |
| if (!LHSReg) |
| return false; |
| bool LHSIsKill = hasTrivialKill(LHS); |
| |
| unsigned RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return false; |
| bool RHSIsKill = hasTrivialKill(RHS); |
| |
| if (VT == MVT::i32) { |
| MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); |
| unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, |
| /*IsKill=*/false, 32); |
| MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, |
| AArch64::sub_32); |
| ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, |
| AArch64::sub_32); |
| emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, |
| AArch64_AM::ASR, 31, /*WantResult=*/false); |
| } else { |
| assert(VT == MVT::i64 && "Unexpected value type."); |
| // LHSReg and RHSReg cannot be killed by this Mul, since they are |
| // reused in the next instruction. |
| MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, |
| /*IsKill=*/false); |
| unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, |
| RHSReg, RHSIsKill); |
| emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, |
| AArch64_AM::ASR, 63, /*WantResult=*/false); |
| } |
| break; |
| } |
| case Intrinsic::umul_with_overflow: { |
| CC = AArch64CC::NE; |
| unsigned LHSReg = getRegForValue(LHS); |
| if (!LHSReg) |
| return false; |
| bool LHSIsKill = hasTrivialKill(LHS); |
| |
| unsigned RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return false; |
| bool RHSIsKill = hasTrivialKill(RHS); |
| |
| if (VT == MVT::i32) { |
| MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); |
| emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, |
| /*IsKill=*/false, AArch64_AM::LSR, 32, |
| /*WantResult=*/false); |
| MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, |
| AArch64::sub_32); |
| } else { |
| assert(VT == MVT::i64 && "Unexpected value type."); |
| // LHSReg and RHSReg cannot be killed by this Mul, since they are |
| // reused in the next instruction. |
| MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, |
| /*IsKill=*/false); |
| unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, |
| RHSReg, RHSIsKill); |
| emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, |
| /*IsKill=*/false, /*WantResult=*/false); |
| } |
| break; |
| } |
| } |
| |
| if (MulReg) { |
| ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); |
| } |
| |
| if (!ResultReg1) |
| return false; |
| |
| ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, |
| AArch64::WZR, /*IsKill=*/true, AArch64::WZR, |
| /*IsKill=*/true, getInvertedCondCode(CC)); |
| (void)ResultReg2; |
| assert((ResultReg1 + 1) == ResultReg2 && |
| "Nonconsecutive result registers."); |
| updateValueMap(II, ResultReg1, 2); |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| bool AArch64FastISel::selectRet(const Instruction *I) { |
| const ReturnInst *Ret = cast<ReturnInst>(I); |
| const Function &F = *I->getParent()->getParent(); |
| |
| if (!FuncInfo.CanLowerReturn) |
| return false; |
| |
| if (F.isVarArg()) |
| return false; |
| |
| if (TLI.supportSwiftError() && |
| F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) |
| return false; |
| |
| if (TLI.supportSplitCSR(FuncInfo.MF)) |
| return false; |
| |
| // Build a list of return value registers. |
| SmallVector<unsigned, 4> RetRegs; |
| |
| if (Ret->getNumOperands() > 0) { |
| CallingConv::ID CC = F.getCallingConv(); |
| SmallVector<ISD::OutputArg, 4> Outs; |
| GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); |
| |
| // Analyze operands of the call, assigning locations to each operand. |
| SmallVector<CCValAssign, 16> ValLocs; |
| CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); |
| CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS |
| : RetCC_AArch64_AAPCS; |
| CCInfo.AnalyzeReturn(Outs, RetCC); |
| |
| // Only handle a single return value for now. |
| if (ValLocs.size() != 1) |
| return false; |
| |
| CCValAssign &VA = ValLocs[0]; |
| const Value *RV = Ret->getOperand(0); |
| |
| // Don't bother handling odd stuff for now. |
| if ((VA.getLocInfo() != CCValAssign::Full) && |
| (VA.getLocInfo() != CCValAssign::BCvt)) |
| return false; |
| |
| // Only handle register returns for now. |
| if (!VA.isRegLoc()) |
| return false; |
| |
| unsigned Reg = getRegForValue(RV); |
| if (Reg == 0) |
| return false; |
| |
| unsigned SrcReg = Reg + VA.getValNo(); |
| Register DestReg = VA.getLocReg(); |
| // Avoid a cross-class copy. This is very unlikely. |
| if (!MRI.getRegClass(SrcReg)->contains(DestReg)) |
| return false; |
| |
| EVT RVEVT = TLI.getValueType(DL, RV->getType()); |
| if (!RVEVT.isSimple()) |
| return false; |
| |
| // Vectors (of > 1 lane) in big endian need tricky handling. |
| if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && |
| !Subtarget->isLittleEndian()) |
| return false; |
| |
| MVT RVVT = RVEVT.getSimpleVT(); |
| if (RVVT == MVT::f128) |
| return false; |
| |
| MVT DestVT = VA.getValVT(); |
| // Special handling for extended integers. |
| if (RVVT != DestVT) { |
| if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) |
| return false; |
| |
| if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) |
| return false; |
| |
| bool IsZExt = Outs[0].Flags.isZExt(); |
| SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); |
| if (SrcReg == 0) |
| return false; |
| } |
| |
| // "Callee" (i.e. value producer) zero extends pointers at function |
| // boundary. |
| if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) |
| SrcReg = emitAnd_ri(MVT::i64, SrcReg, false, 0xffffffff); |
| |
| // Make the copy. |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); |
| |
| // Add register to return instruction. |
| RetRegs.push_back(VA.getLocReg()); |
| } |
| |
| MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::RET_ReallyLR)); |
| for (unsigned RetReg : RetRegs) |
| MIB.addReg(RetReg, RegState::Implicit); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectTrunc(const Instruction *I) { |
| Type *DestTy = I->getType(); |
| Value *Op = I->getOperand(0); |
| Type *SrcTy = Op->getType(); |
| |
| EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); |
| EVT DestEVT = TLI.getValueType(DL, DestTy, true); |
| if (!SrcEVT.isSimple()) |
| return false; |
| if (!DestEVT.isSimple()) |
| return false; |
| |
| MVT SrcVT = SrcEVT.getSimpleVT(); |
| MVT DestVT = DestEVT.getSimpleVT(); |
| |
| if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && |
| SrcVT != MVT::i8) |
| return false; |
| if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && |
| DestVT != MVT::i1) |
| return false; |
| |
| unsigned SrcReg = getRegForValue(Op); |
| if (!SrcReg) |
| return false; |
| bool SrcIsKill = hasTrivialKill(Op); |
| |
| // If we're truncating from i64 to a smaller non-legal type then generate an |
| // AND. Otherwise, we know the high bits are undefined and a truncate only |
| // generate a COPY. We cannot mark the source register also as result |
| // register, because this can incorrectly transfer the kill flag onto the |
| // source register. |
| unsigned ResultReg; |
| if (SrcVT == MVT::i64) { |
| uint64_t Mask = 0; |
| switch (DestVT.SimpleTy) { |
| default: |
| // Trunc i64 to i32 is handled by the target-independent fast-isel. |
| return false; |
| case MVT::i1: |
| Mask = 0x1; |
| break; |
| case MVT::i8: |
| Mask = 0xff; |
| break; |
| case MVT::i16: |
| Mask = 0xffff; |
| break; |
| } |
| // Issue an extract_subreg to get the lower 32-bits. |
| unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, |
| AArch64::sub_32); |
| // Create the AND instruction which performs the actual truncation. |
| ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); |
| assert(ResultReg && "Unexpected AND instruction emission failure."); |
| } else { |
| ResultReg = createResultReg(&AArch64::GPR32RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), ResultReg) |
| .addReg(SrcReg, getKillRegState(SrcIsKill)); |
| } |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { |
| assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || |
| DestVT == MVT::i64) && |
| "Unexpected value type."); |
| // Handle i8 and i16 as i32. |
| if (DestVT == MVT::i8 || DestVT == MVT::i16) |
| DestVT = MVT::i32; |
| |
| if (IsZExt) { |
| unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); |
| assert(ResultReg && "Unexpected AND instruction emission failure."); |
| if (DestVT == MVT::i64) { |
| // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the |
| // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. |
| Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::SUBREG_TO_REG), Reg64) |
| .addImm(0) |
| .addReg(ResultReg) |
| .addImm(AArch64::sub_32); |
| ResultReg = Reg64; |
| } |
| return ResultReg; |
| } else { |
| if (DestVT == MVT::i64) { |
| // FIXME: We're SExt i1 to i64. |
| return 0; |
| } |
| return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, |
| /*TODO:IsKill=*/false, 0, 0); |
| } |
| } |
| |
| unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
| unsigned Op1, bool Op1IsKill) { |
| unsigned Opc, ZReg; |
| switch (RetVT.SimpleTy) { |
| default: return 0; |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| RetVT = MVT::i32; |
| Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; |
| case MVT::i64: |
| Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; |
| } |
| |
| const TargetRegisterClass *RC = |
| (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, |
| /*IsKill=*/ZReg, true); |
| } |
| |
| unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
| unsigned Op1, bool Op1IsKill) { |
| if (RetVT != MVT::i64) |
| return 0; |
| |
| return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, |
| Op0, Op0IsKill, Op1, Op1IsKill, |
| AArch64::XZR, /*IsKill=*/true); |
| } |
| |
| unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
| unsigned Op1, bool Op1IsKill) { |
| if (RetVT != MVT::i64) |
| return 0; |
| |
| return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, |
| Op0, Op0IsKill, Op1, Op1IsKill, |
| AArch64::XZR, /*IsKill=*/true); |
| } |
| |
| unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
| unsigned Op1Reg, bool Op1IsKill) { |
| unsigned Opc = 0; |
| bool NeedTrunc = false; |
| uint64_t Mask = 0; |
| switch (RetVT.SimpleTy) { |
| default: return 0; |
| case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; |
| case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; |
| case MVT::i32: Opc = AArch64::LSLVWr; break; |
| case MVT::i64: Opc = AArch64::LSLVXr; break; |
| } |
| |
| const TargetRegisterClass *RC = |
| (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| if (NeedTrunc) { |
| Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); |
| Op1IsKill = true; |
| } |
| unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, |
| Op1IsKill); |
| if (NeedTrunc) |
| ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
| bool Op0IsKill, uint64_t Shift, |
| bool IsZExt) { |
| assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
| "Unexpected source/return type pair."); |
| assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
| SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
| "Unexpected source value type."); |
| assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
| RetVT == MVT::i64) && "Unexpected return value type."); |
| |
| bool Is64Bit = (RetVT == MVT::i64); |
| unsigned RegSize = Is64Bit ? 64 : 32; |
| unsigned DstBits = RetVT.getSizeInBits(); |
| unsigned SrcBits = SrcVT.getSizeInBits(); |
| const TargetRegisterClass *RC = |
| Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| |
| // Just emit a copy for "zero" shifts. |
| if (Shift == 0) { |
| if (RetVT == SrcVT) { |
| unsigned ResultReg = createResultReg(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), ResultReg) |
| .addReg(Op0, getKillRegState(Op0IsKill)); |
| return ResultReg; |
| } else |
| return emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
| } |
| |
| // Don't deal with undefined shifts. |
| if (Shift >= DstBits) |
| return 0; |
| |
| // For immediate shifts we can fold the zero-/sign-extension into the shift. |
| // {S|U}BFM Wd, Wn, #r, #s |
| // Wd<32+s-r,32-r> = Wn<s:0> when r > s |
| |
| // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
| // %2 = shl i16 %1, 4 |
| // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 |
| // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext |
| // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext |
| // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext |
| |
| // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
| // %2 = shl i16 %1, 8 |
| // Wd<32+7-24,32-24> = Wn<7:0> |
| // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext |
| // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext |
| // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext |
| |
| // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
| // %2 = shl i16 %1, 12 |
| // Wd<32+3-20,32-20> = Wn<3:0> |
| // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext |
| // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext |
| // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext |
| |
| unsigned ImmR = RegSize - Shift; |
| // Limit the width to the length of the source type. |
| unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); |
| static const unsigned OpcTable[2][2] = { |
| {AArch64::SBFMWri, AArch64::SBFMXri}, |
| {AArch64::UBFMWri, AArch64::UBFMXri} |
| }; |
| unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
| if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
| Register TmpReg = MRI.createVirtualRegister(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
| .addImm(0) |
| .addReg(Op0, getKillRegState(Op0IsKill)) |
| .addImm(AArch64::sub_32); |
| Op0 = TmpReg; |
| Op0IsKill = true; |
| } |
| return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); |
| } |
| |
| unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
| unsigned Op1Reg, bool Op1IsKill) { |
| unsigned Opc = 0; |
| bool NeedTrunc = false; |
| uint64_t Mask = 0; |
| switch (RetVT.SimpleTy) { |
| default: return 0; |
| case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; |
| case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; |
| case MVT::i32: Opc = AArch64::LSRVWr; break; |
| case MVT::i64: Opc = AArch64::LSRVXr; break; |
| } |
| |
| const TargetRegisterClass *RC = |
| (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| if (NeedTrunc) { |
| Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); |
| Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); |
| Op0IsKill = Op1IsKill = true; |
| } |
| unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, |
| Op1IsKill); |
| if (NeedTrunc) |
| ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
| bool Op0IsKill, uint64_t Shift, |
| bool IsZExt) { |
| assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
| "Unexpected source/return type pair."); |
| assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
| SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
| "Unexpected source value type."); |
| assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
| RetVT == MVT::i64) && "Unexpected return value type."); |
| |
| bool Is64Bit = (RetVT == MVT::i64); |
| unsigned RegSize = Is64Bit ? 64 : 32; |
| unsigned DstBits = RetVT.getSizeInBits(); |
| unsigned SrcBits = SrcVT.getSizeInBits(); |
| const TargetRegisterClass *RC = |
| Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| |
| // Just emit a copy for "zero" shifts. |
| if (Shift == 0) { |
| if (RetVT == SrcVT) { |
| unsigned ResultReg = createResultReg(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), ResultReg) |
| .addReg(Op0, getKillRegState(Op0IsKill)); |
| return ResultReg; |
| } else |
| return emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
| } |
| |
| // Don't deal with undefined shifts. |
| if (Shift >= DstBits) |
| return 0; |
| |
| // For immediate shifts we can fold the zero-/sign-extension into the shift. |
| // {S|U}BFM Wd, Wn, #r, #s |
| // Wd<s-r:0> = Wn<s:r> when r <= s |
| |
| // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
| // %2 = lshr i16 %1, 4 |
| // Wd<7-4:0> = Wn<7:4> |
| // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
| // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
| |
| // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
| // %2 = lshr i16 %1, 8 |
| // Wd<7-7,0> = Wn<7:7> |
| // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
| |
| // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
| // %2 = lshr i16 %1, 12 |
| // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
| // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
| |
| if (Shift >= SrcBits && IsZExt) |
| return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); |
| |
| // It is not possible to fold a sign-extend into the LShr instruction. In this |
| // case emit a sign-extend. |
| if (!IsZExt) { |
| Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
| if (!Op0) |
| return 0; |
| Op0IsKill = true; |
| SrcVT = RetVT; |
| SrcBits = SrcVT.getSizeInBits(); |
| IsZExt = true; |
| } |
| |
| unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); |
| unsigned ImmS = SrcBits - 1; |
| static const unsigned OpcTable[2][2] = { |
| {AArch64::SBFMWri, AArch64::SBFMXri}, |
| {AArch64::UBFMWri, AArch64::UBFMXri} |
| }; |
| unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
| if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
| Register TmpReg = MRI.createVirtualRegister(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
| .addImm(0) |
| .addReg(Op0, getKillRegState(Op0IsKill)) |
| .addImm(AArch64::sub_32); |
| Op0 = TmpReg; |
| Op0IsKill = true; |
| } |
| return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); |
| } |
| |
| unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
| unsigned Op1Reg, bool Op1IsKill) { |
| unsigned Opc = 0; |
| bool NeedTrunc = false; |
| uint64_t Mask = 0; |
| switch (RetVT.SimpleTy) { |
| default: return 0; |
| case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; |
| case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; |
| case MVT::i32: Opc = AArch64::ASRVWr; break; |
| case MVT::i64: Opc = AArch64::ASRVXr; break; |
| } |
| |
| const TargetRegisterClass *RC = |
| (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| if (NeedTrunc) { |
| Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); |
| Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); |
| Op0IsKill = Op1IsKill = true; |
| } |
| unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, |
| Op1IsKill); |
| if (NeedTrunc) |
| ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
| bool Op0IsKill, uint64_t Shift, |
| bool IsZExt) { |
| assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
| "Unexpected source/return type pair."); |
| assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
| SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
| "Unexpected source value type."); |
| assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
| RetVT == MVT::i64) && "Unexpected return value type."); |
| |
| bool Is64Bit = (RetVT == MVT::i64); |
| unsigned RegSize = Is64Bit ? 64 : 32; |
| unsigned DstBits = RetVT.getSizeInBits(); |
| unsigned SrcBits = SrcVT.getSizeInBits(); |
| const TargetRegisterClass *RC = |
| Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| |
| // Just emit a copy for "zero" shifts. |
| if (Shift == 0) { |
| if (RetVT == SrcVT) { |
| unsigned ResultReg = createResultReg(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(TargetOpcode::COPY), ResultReg) |
| .addReg(Op0, getKillRegState(Op0IsKill)); |
| return ResultReg; |
| } else |
| return emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
| } |
| |
| // Don't deal with undefined shifts. |
| if (Shift >= DstBits) |
| return 0; |
| |
| // For immediate shifts we can fold the zero-/sign-extension into the shift. |
| // {S|U}BFM Wd, Wn, #r, #s |
| // Wd<s-r:0> = Wn<s:r> when r <= s |
| |
| // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
| // %2 = ashr i16 %1, 4 |
| // Wd<7-4:0> = Wn<7:4> |
| // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
| // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
| |
| // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
| // %2 = ashr i16 %1, 8 |
| // Wd<7-7,0> = Wn<7:7> |
| // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
| |
| // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
| // %2 = ashr i16 %1, 12 |
| // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
| // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
| // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
| |
| if (Shift >= SrcBits && IsZExt) |
| return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); |
| |
| unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); |
| unsigned ImmS = SrcBits - 1; |
| static const unsigned OpcTable[2][2] = { |
| {AArch64::SBFMWri, AArch64::SBFMXri}, |
| {AArch64::UBFMWri, AArch64::UBFMXri} |
| }; |
| unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
| if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
| Register TmpReg = MRI.createVirtualRegister(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
| .addImm(0) |
| .addReg(Op0, getKillRegState(Op0IsKill)) |
| .addImm(AArch64::sub_32); |
| Op0 = TmpReg; |
| Op0IsKill = true; |
| } |
| return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); |
| } |
| |
| unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, |
| bool IsZExt) { |
| assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); |
| |
| // FastISel does not have plumbing to deal with extensions where the SrcVT or |
| // DestVT are odd things, so test to make sure that they are both types we can |
| // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise |
| // bail out to SelectionDAG. |
| if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && |
| (DestVT != MVT::i32) && (DestVT != MVT::i64)) || |
| ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && |
| (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) |
| return 0; |
| |
| unsigned Opc; |
| unsigned Imm = 0; |
| |
| switch (SrcVT.SimpleTy) { |
| default: |
| return 0; |
| case MVT::i1: |
| return emiti1Ext(SrcReg, DestVT, IsZExt); |
| case MVT::i8: |
| if (DestVT == MVT::i64) |
| Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
| else |
| Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; |
| Imm = 7; |
| break; |
| case MVT::i16: |
| if (DestVT == MVT::i64) |
| Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
| else |
| Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; |
| Imm = 15; |
| break; |
| case MVT::i32: |
| assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); |
| Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
| Imm = 31; |
| break; |
| } |
| |
| // Handle i8 and i16 as i32. |
| if (DestVT == MVT::i8 || DestVT == MVT::i16) |
| DestVT = MVT::i32; |
| else if (DestVT == MVT::i64) { |
| Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::SUBREG_TO_REG), Src64) |
| .addImm(0) |
| .addReg(SrcReg) |
| .addImm(AArch64::sub_32); |
| SrcReg = Src64; |
| } |
| |
| const TargetRegisterClass *RC = |
| (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); |
| } |
| |
| static bool isZExtLoad(const MachineInstr *LI) { |
| switch (LI->getOpcode()) { |
| default: |
| return false; |
| case AArch64::LDURBBi: |
| case AArch64::LDURHHi: |
| case AArch64::LDURWi: |
| case AArch64::LDRBBui: |
| case AArch64::LDRHHui: |
| case AArch64::LDRWui: |
| case AArch64::LDRBBroX: |
| case AArch64::LDRHHroX: |
| case AArch64::LDRWroX: |
| case AArch64::LDRBBroW: |
| case AArch64::LDRHHroW: |
| case AArch64::LDRWroW: |
| return true; |
| } |
| } |
| |
| static bool isSExtLoad(const MachineInstr *LI) { |
| switch (LI->getOpcode()) { |
| default: |
| return false; |
| case AArch64::LDURSBWi: |
| case AArch64::LDURSHWi: |
| case AArch64::LDURSBXi: |
| case AArch64::LDURSHXi: |
| case AArch64::LDURSWi: |
| case AArch64::LDRSBWui: |
| case AArch64::LDRSHWui: |
| case AArch64::LDRSBXui: |
| case AArch64::LDRSHXui: |
| case AArch64::LDRSWui: |
| case AArch64::LDRSBWroX: |
| case AArch64::LDRSHWroX: |
| case AArch64::LDRSBXroX: |
| case AArch64::LDRSHXroX: |
| case AArch64::LDRSWroX: |
| case AArch64::LDRSBWroW: |
| case AArch64::LDRSHWroW: |
| case AArch64::LDRSBXroW: |
| case AArch64::LDRSHXroW: |
| case AArch64::LDRSWroW: |
| return true; |
| } |
| } |
| |
| bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, |
| MVT SrcVT) { |
| const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); |
| if (!LI || !LI->hasOneUse()) |
| return false; |
| |
| // Check if the load instruction has already been selected. |
| unsigned Reg = lookUpRegForValue(LI); |
| if (!Reg) |
| return false; |
| |
| MachineInstr *MI = MRI.getUniqueVRegDef(Reg); |
| if (!MI) |
| return false; |
| |
| // Check if the correct load instruction has been emitted - SelectionDAG might |
| // have emitted a zero-extending load, but we need a sign-extending load. |
| bool IsZExt = isa<ZExtInst>(I); |
| const auto *LoadMI = MI; |
| if (LoadMI->getOpcode() == TargetOpcode::COPY && |
| LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { |
| Register LoadReg = MI->getOperand(1).getReg(); |
| LoadMI = MRI.getUniqueVRegDef(LoadReg); |
| assert(LoadMI && "Expected valid instruction"); |
| } |
| if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) |
| return false; |
| |
| // Nothing to be done. |
| if (RetVT != MVT::i64 || SrcVT > MVT::i32) { |
| updateValueMap(I, Reg); |
| return true; |
| } |
| |
| if (IsZExt) { |
| unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::SUBREG_TO_REG), Reg64) |
| .addImm(0) |
| .addReg(Reg, getKillRegState(true)) |
| .addImm(AArch64::sub_32); |
| Reg = Reg64; |
| } else { |
| assert((MI->getOpcode() == TargetOpcode::COPY && |
| MI->getOperand(1).getSubReg() == AArch64::sub_32) && |
| "Expected copy instruction"); |
| Reg = MI->getOperand(1).getReg(); |
| MachineBasicBlock::iterator I(MI); |
| removeDeadCode(I, std::next(I)); |
| } |
| updateValueMap(I, Reg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectIntExt(const Instruction *I) { |
| assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
| "Unexpected integer extend instruction."); |
| MVT RetVT; |
| MVT SrcVT; |
| if (!isTypeSupported(I->getType(), RetVT)) |
| return false; |
| |
| if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) |
| return false; |
| |
| // Try to optimize already sign-/zero-extended values from load instructions. |
| if (optimizeIntExtLoad(I, RetVT, SrcVT)) |
| return true; |
| |
| unsigned SrcReg = getRegForValue(I->getOperand(0)); |
| if (!SrcReg) |
| return false; |
| bool SrcIsKill = hasTrivialKill(I->getOperand(0)); |
| |
| // Try to optimize already sign-/zero-extended values from function arguments. |
| bool IsZExt = isa<ZExtInst>(I); |
| if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { |
| if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { |
| if (RetVT == MVT::i64 && SrcVT != MVT::i64) { |
| unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
| TII.get(AArch64::SUBREG_TO_REG), ResultReg) |
| .addImm(0) |
| .addReg(SrcReg, getKillRegState(SrcIsKill)) |
| .addImm(AArch64::sub_32); |
| SrcReg = ResultReg; |
| } |
| // Conservatively clear all kill flags from all uses, because we are |
| // replacing a sign-/zero-extend instruction at IR level with a nop at MI |
| // level. The result of the instruction at IR level might have been |
| // trivially dead, which is now not longer true. |
| unsigned UseReg = lookUpRegForValue(I); |
| if (UseReg) |
| MRI.clearKillFlags(UseReg); |
| |
| updateValueMap(I, SrcReg); |
| return true; |
| } |
| } |
| |
| unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { |
| EVT DestEVT = TLI.getValueType(DL, I->getType(), true); |
| if (!DestEVT.isSimple()) |
| return false; |
| |
| MVT DestVT = DestEVT.getSimpleVT(); |
| if (DestVT != MVT::i64 && DestVT != MVT::i32) |
| return false; |
| |
| unsigned DivOpc; |
| bool Is64bit = (DestVT == MVT::i64); |
| switch (ISDOpcode) { |
| default: |
| return false; |
| case ISD::SREM: |
| DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; |
| break; |
| case ISD::UREM: |
| DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; |
| break; |
| } |
| unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; |
| unsigned Src0Reg = getRegForValue(I->getOperand(0)); |
| if (!Src0Reg) |
| return false; |
| bool Src0IsKill = hasTrivialKill(I->getOperand(0)); |
| |
| unsigned Src1Reg = getRegForValue(I->getOperand(1)); |
| if (!Src1Reg) |
| return false; |
| bool Src1IsKill = hasTrivialKill(I->getOperand(1)); |
| |
| const TargetRegisterClass *RC = |
| (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, |
| Src1Reg, /*IsKill=*/false); |
| assert(QuotReg && "Unexpected DIV instruction emission failure."); |
| // The remainder is computed as numerator - (quotient * denominator) using the |
| // MSUB instruction. |
| unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, |
| Src1Reg, Src1IsKill, Src0Reg, |
| Src0IsKill); |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectMul(const Instruction *I) { |
| MVT VT; |
| if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
| return false; |
| |
| if (VT.isVector()) |
| return selectBinaryOp(I, ISD::MUL); |
| |
| const Value *Src0 = I->getOperand(0); |
| const Value *Src1 = I->getOperand(1); |
| if (const auto *C = dyn_cast<ConstantInt>(Src0)) |
| if (C->getValue().isPowerOf2()) |
| std::swap(Src0, Src1); |
| |
| // Try to simplify to a shift instruction. |
| if (const auto *C = dyn_cast<ConstantInt>(Src1)) |
| if (C->getValue().isPowerOf2()) { |
| uint64_t ShiftVal = C->getValue().logBase2(); |
| MVT SrcVT = VT; |
| bool IsZExt = true; |
| if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { |
| if (!isIntExtFree(ZExt)) { |
| MVT VT; |
| if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { |
| SrcVT = VT; |
| IsZExt = true; |
| Src0 = ZExt->getOperand(0); |
| } |
| } |
| } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { |
| if (!isIntExtFree(SExt)) { |
| MVT VT; |
| if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { |
| SrcVT = VT; |
| IsZExt = false; |
| Src0 = SExt->getOperand(0); |
| } |
| } |
| } |
| |
| unsigned Src0Reg = getRegForValue(Src0); |
| if (!Src0Reg) |
| return false; |
| bool Src0IsKill = hasTrivialKill(Src0); |
| |
| unsigned ResultReg = |
| emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); |
| |
| if (ResultReg) { |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| } |
| |
| unsigned Src0Reg = getRegForValue(I->getOperand(0)); |
| if (!Src0Reg) |
| return false; |
| bool Src0IsKill = hasTrivialKill(I->getOperand(0)); |
| |
| unsigned Src1Reg = getRegForValue(I->getOperand(1)); |
| if (!Src1Reg) |
| return false; |
| bool Src1IsKill = hasTrivialKill(I->getOperand(1)); |
| |
| unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); |
| |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectShift(const Instruction *I) { |
| MVT RetVT; |
| if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) |
| return false; |
| |
| if (RetVT.isVector()) |
| return selectOperator(I, I->getOpcode()); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { |
| unsigned ResultReg = 0; |
| uint64_t ShiftVal = C->getZExtValue(); |
| MVT SrcVT = RetVT; |
| bool IsZExt = I->getOpcode() != Instruction::AShr; |
| const Value *Op0 = I->getOperand(0); |
| if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { |
| if (!isIntExtFree(ZExt)) { |
| MVT TmpVT; |
| if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { |
| SrcVT = TmpVT; |
| IsZExt = true; |
| Op0 = ZExt->getOperand(0); |
| } |
| } |
| } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { |
| if (!isIntExtFree(SExt)) { |
| MVT TmpVT; |
| if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { |
| SrcVT = TmpVT; |
| IsZExt = false; |
| Op0 = SExt->getOperand(0); |
| } |
| } |
| } |
| |
| unsigned Op0Reg = getRegForValue(Op0); |
| if (!Op0Reg) |
| return false; |
| bool Op0IsKill = hasTrivialKill(Op0); |
| |
| switch (I->getOpcode()) { |
| default: llvm_unreachable("Unexpected instruction."); |
| case Instruction::Shl: |
| ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); |
| break; |
| case Instruction::AShr: |
| ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); |
| break; |
| case Instruction::LShr: |
| ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); |
| break; |
| } |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| unsigned Op0Reg = getRegForValue(I->getOperand(0)); |
| if (!Op0Reg) |
| return false; |
| bool Op0IsKill = hasTrivialKill(I->getOperand(0)); |
| |
| unsigned Op1Reg = getRegForValue(I->getOperand(1)); |
| if (!Op1Reg) |
| return false; |
| bool Op1IsKill = hasTrivialKill(I->getOperand(1)); |
| |
| unsigned ResultReg = 0; |
| switch (I->getOpcode()) { |
| default: llvm_unreachable("Unexpected instruction."); |
| case Instruction::Shl: |
| ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); |
| break; |
| case Instruction::AShr: |
| ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); |
| break; |
| case Instruction::LShr: |
| ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); |
| break; |
| } |
| |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectBitCast(const Instruction *I) { |
| MVT RetVT, SrcVT; |
| |
| if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) |
| return false; |
| if (!isTypeLegal(I->getType(), RetVT)) |
| return false; |
| |
| unsigned Opc; |
| if (RetVT == MVT::f32 && SrcVT == MVT::i32) |
| Opc = AArch64::FMOVWSr; |
| else if (RetVT == MVT::f64 && SrcVT == MVT::i64) |
| Opc = AArch64::FMOVXDr; |
| else if (RetVT == MVT::i32 && SrcVT == MVT::f32) |
| Opc = AArch64::FMOVSWr; |
| else if (RetVT == MVT::i64 && SrcVT == MVT::f64) |
| Opc = AArch64::FMOVDXr; |
| else |
| return false; |
| |
| const TargetRegisterClass *RC = nullptr; |
| switch (RetVT.SimpleTy) { |
| default: llvm_unreachable("Unexpected value type."); |
| case MVT::i32: RC = &AArch64::GPR32RegClass; break; |
| case MVT::i64: RC = &AArch64::GPR64RegClass; break; |
| case MVT::f32: RC = &AArch64::FPR32RegClass; break; |
| case MVT::f64: RC = &AArch64::FPR64RegClass; break; |
| } |
| unsigned Op0Reg = getRegForValue(I->getOperand(0)); |
| if (!Op0Reg) |
| return false; |
| bool Op0IsKill = hasTrivialKill(I->getOperand(0)); |
| unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); |
| |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectFRem(const Instruction *I) { |
| MVT RetVT; |
| if (!isTypeLegal(I->getType(), RetVT)) |
| return false; |
| |
| RTLIB::Libcall LC; |
| switch (RetVT.SimpleTy) { |
| default: |
| return false; |
| case MVT::f32: |
| LC = RTLIB::REM_F32; |
| break; |
| case MVT::f64: |
| LC = RTLIB::REM_F64; |
| break; |
| } |
| |
| ArgListTy Args; |
| Args.reserve(I->getNumOperands()); |
| |
| // Populate the argument list. |
| for (auto &Arg : I->operands()) { |
| ArgListEntry Entry; |
| Entry.Val = Arg; |
| Entry.Ty = Arg->getType(); |
| Args.push_back(Entry); |
| } |
| |
| CallLoweringInfo CLI; |
| MCContext &Ctx = MF->getContext(); |
| CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), |
| TLI.getLibcallName(LC), std::move(Args)); |
| if (!lowerCallTo(CLI)) |
| return false; |
| updateValueMap(I, CLI.ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectSDiv(const Instruction *I) { |
| MVT VT; |
| if (!isTypeLegal(I->getType(), VT)) |
| return false; |
| |
| if (!isa<ConstantInt>(I->getOperand(1))) |
| return selectBinaryOp(I, ISD::SDIV); |
| |
| const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); |
| if ((VT != MVT::i32 && VT != MVT::i64) || !C || |
| !(C.isPowerOf2() || (-C).isPowerOf2())) |
| return selectBinaryOp(I, ISD::SDIV); |
| |
| unsigned Lg2 = C.countTrailingZeros(); |
| unsigned Src0Reg = getRegForValue(I->getOperand(0)); |
| if (!Src0Reg) |
| return false; |
| bool Src0IsKill = hasTrivialKill(I->getOperand(0)); |
| |
| if (cast<BinaryOperator>(I)->isExact()) { |
| unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); |
| if (!ResultReg) |
| return false; |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| int64_t Pow2MinusOne = (1ULL << Lg2) - 1; |
| unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); |
| if (!AddReg) |
| return false; |
| |
| // (Src0 < 0) ? Pow2 - 1 : 0; |
| if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) |
| return false; |
| |
| unsigned SelectOpc; |
| const TargetRegisterClass *RC; |
| if (VT == MVT::i64) { |
| SelectOpc = AArch64::CSELXr; |
| RC = &AArch64::GPR64RegClass; |
| } else { |
| SelectOpc = AArch64::CSELWr; |
| RC = &AArch64::GPR32RegClass; |
| } |
| unsigned SelectReg = |
| fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, |
| Src0IsKill, AArch64CC::LT); |
| if (!SelectReg) |
| return false; |
| |
| // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also |
| // negate the result. |
| unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
| unsigned ResultReg; |
| if (C.isNegative()) |
| ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, |
| SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); |
| else |
| ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); |
| |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We |
| /// have to duplicate it for AArch64, because otherwise we would fail during the |
| /// sign-extend emission. |
| std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { |
| unsigned IdxN = getRegForValue(Idx); |
| if (IdxN == 0) |
| // Unhandled operand. Halt "fast" selection and bail. |
| return std::pair<unsigned, bool>(0, false); |
| |
| bool IdxNIsKill = hasTrivialKill(Idx); |
| |
| // If the index is smaller or larger than intptr_t, truncate or extend it. |
| MVT PtrVT = TLI.getPointerTy(DL); |
| EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); |
| if (IdxVT.bitsLT(PtrVT)) { |
| IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); |
| IdxNIsKill = true; |
| } else if (IdxVT.bitsGT(PtrVT)) |
| llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); |
| return std::pair<unsigned, bool>(IdxN, IdxNIsKill); |
| } |
| |
| /// This is mostly a copy of the existing FastISel GEP code, but we have to |
| /// duplicate it for AArch64, because otherwise we would bail out even for |
| /// simple cases. This is because the standard fastEmit functions don't cover |
| /// MUL at all and ADD is lowered very inefficientily. |
| bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { |
| if (Subtarget->isTargetILP32()) |
| return false; |
| |
| unsigned N = getRegForValue(I->getOperand(0)); |
| if (!N) |
| return false; |
| bool NIsKill = hasTrivialKill(I->getOperand(0)); |
| |
| // Keep a running tab of the total offset to coalesce multiple N = N + Offset |
| // into a single N = N + TotalOffset. |
| uint64_t TotalOffs = 0; |
| MVT VT = TLI.getPointerTy(DL); |
| for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); |
| GTI != E; ++GTI) { |
| const Value *Idx = GTI.getOperand(); |
| if (auto *StTy = GTI.getStructTypeOrNull()) { |
| unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); |
| // N = N + Offset |
| if (Field) |
| TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); |
| } else { |
| Type *Ty = GTI.getIndexedType(); |
| |
| // If this is a constant subscript, handle it quickly. |
| if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { |
| if (CI->isZero()) |
| continue; |
| // N = N + Offset |
| TotalOffs += |
| DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); |
| continue; |
| } |
| if (TotalOffs) { |
| N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); |
| if (!N) |
| return false; |
| NIsKill = true; |
| TotalOffs = 0; |
| } |
| |
| // N = N + Idx * ElementSize; |
| uint64_t ElementSize = DL.getTypeAllocSize(Ty); |
| std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); |
| unsigned IdxN = Pair.first; |
| bool IdxNIsKill = Pair.second; |
| if (!IdxN) |
| return false; |
| |
| if (ElementSize != 1) { |
| unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); |
| if (!C) |
| return false; |
| IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); |
| if (!IdxN) |
| return false; |
| IdxNIsKill = true; |
| } |
| N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); |
| if (!N) |
| return false; |
| } |
| } |
| if (TotalOffs) { |
| N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); |
| if (!N) |
| return false; |
| } |
| updateValueMap(I, N); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { |
| assert(TM.getOptLevel() == CodeGenOpt::None && |
| "cmpxchg survived AtomicExpand at optlevel > -O0"); |
| |
| auto *RetPairTy = cast<StructType>(I->getType()); |
| Type *RetTy = RetPairTy->getTypeAtIndex(0U); |
| assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && |
| "cmpxchg has a non-i1 status result"); |
| |
| MVT VT; |
| if (!isTypeLegal(RetTy, VT)) |
| return false; |
| |
| const TargetRegisterClass *ResRC; |
| unsigned Opc, CmpOpc; |
| // This only supports i32/i64, because i8/i16 aren't legal, and the generic |
| // extractvalue selection doesn't support that. |
| if (VT == MVT::i32) { |
| Opc = AArch64::CMP_SWAP_32; |
| CmpOpc = AArch64::SUBSWrs; |
| ResRC = &AArch64::GPR32RegClass; |
| } else if (VT == MVT::i64) { |
| Opc = AArch64::CMP_SWAP_64; |
| CmpOpc = AArch64::SUBSXrs; |
| ResRC = &AArch64::GPR64RegClass; |
| } else { |
| return false; |
| } |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| |
| const unsigned AddrReg = constrainOperandRegClass( |
| II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); |
| const unsigned DesiredReg = constrainOperandRegClass( |
| II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); |
| const unsigned NewReg = constrainOperandRegClass( |
| II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); |
| |
| const unsigned ResultReg1 = createResultReg(ResRC); |
| const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); |
| const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass); |
| |
| // FIXME: MachineMemOperand doesn't support cmpxchg yet. |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) |
| .addDef(ResultReg1) |
| .addDef(ScratchReg) |
| .addUse(AddrReg) |
| .addUse(DesiredReg) |
| .addUse(NewReg); |
| |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) |
| .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) |
| .addUse(ResultReg1) |
| .addUse(DesiredReg) |
| .addImm(0); |
| |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) |
| .addDef(ResultReg2) |
| .addUse(AArch64::WZR) |
| .addUse(AArch64::WZR) |
| .addImm(AArch64CC::NE); |
| |
| assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); |
| updateValueMap(I, ResultReg1, 2); |
| return true; |
| } |
| |
| bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { |
| switch (I->getOpcode()) { |
| default: |
| break; |
| case Instruction::Add: |
| case Instruction::Sub: |
| return selectAddSub(I); |
| case Instruction::Mul: |
| return selectMul(I); |
| case Instruction::SDiv: |
| return selectSDiv(I); |
| case Instruction::SRem: |
| if (!selectBinaryOp(I, ISD::SREM)) |
| return selectRem(I, ISD::SREM); |
| return true; |
| case Instruction::URem: |
| if (!selectBinaryOp(I, ISD::UREM)) |
| return selectRem(I, ISD::UREM); |
| return true; |
| case Instruction::Shl: |
| case Instruction::LShr: |
| case Instruction::AShr: |
| return selectShift(I); |
| case Instruction::And: |
| case Instruction::Or: |
| case Instruction::Xor: |
| return selectLogicalOp(I); |
| case Instruction::Br: |
| return selectBranch(I); |
| case Instruction::IndirectBr: |
| return selectIndirectBr(I); |
| case Instruction::BitCast: |
| if (!FastISel::selectBitCast(I)) |
| return selectBitCast(I); |
| return true; |
| case Instruction::FPToSI: |
| if (!selectCast(I, ISD::FP_TO_SINT)) |
| return selectFPToInt(I, /*Signed=*/true); |
| return true; |
| case Instruction::FPToUI: |
| return selectFPToInt(I, /*Signed=*/false); |
| case Instruction::ZExt: |
| case Instruction::SExt: |
| return selectIntExt(I); |
| case Instruction::Trunc: |
| if (!selectCast(I, ISD::TRUNCATE)) |
| return selectTrunc(I); |
| return true; |
| case Instruction::FPExt: |
| return selectFPExt(I); |
| case Instruction::FPTrunc: |
| return selectFPTrunc(I); |
| case Instruction::SIToFP: |
| if (!selectCast(I, ISD::SINT_TO_FP)) |
| return selectIntToFP(I, /*Signed=*/true); |
| return true; |
| case Instruction::UIToFP: |
| return selectIntToFP(I, /*Signed=*/false); |
| case Instruction::Load: |
| return selectLoad(I); |
| case Instruction::Store: |
| return selectStore(I); |
| case Instruction::FCmp: |
| case Instruction::ICmp: |
| return selectCmp(I); |
| case Instruction::Select: |
| return selectSelect(I); |
| case Instruction::Ret: |
| return selectRet(I); |
| case Instruction::FRem: |
| return selectFRem(I); |
| case Instruction::GetElementPtr: |
| return selectGetElementPtr(I); |
| case Instruction::AtomicCmpXchg: |
| return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); |
| } |
| |
| // fall-back to target-independent instruction selection. |
| return selectOperator(I, I->getOpcode()); |
| } |
| |
| namespace llvm { |
| |
| FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, |
| const TargetLibraryInfo *LibInfo) { |
| return new AArch64FastISel(FuncInfo, LibInfo); |
| } |
| |
| } // end namespace llvm |