Blame - src/IceTargetLoweringARM32.cpp - SwiftShader

blob: e09a85fff0b41b4fd692cba9b6f928c6da20099b [file] [log] [blame]

Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1	//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
				2	//
				3	// The Subzero Code Generator
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
Andrew Scull	9612d32	2015-07-06 14:53:25 -0700	[diff] [blame]	9	///
				10	/// \file
				11	/// This file implements the TargetLoweringARM32 class, which consists almost
				12	/// entirely of the lowering sequence for each high-level instruction.
				13	///
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	14	//===----------------------------------------------------------------------===//
				15
John Porto	67f8de9	2015-06-25 10:14:17 -0700	[diff] [blame]	16	#include "IceTargetLoweringARM32.h"
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	17
				18	#include "IceCfg.h"
				19	#include "IceCfgNode.h"
				20	#include "IceClFlags.h"
				21	#include "IceDefs.h"
				22	#include "IceELFObjectWriter.h"
				23	#include "IceGlobalInits.h"
				24	#include "IceInstARM32.h"
				25	#include "IceLiveness.h"
				26	#include "IceOperand.h"
Jan Voung	5348369	2015-07-16 10:47:46 -0700	[diff] [blame]	27	#include "IcePhiLoweringImpl.h"
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	28	#include "IceRegistersARM32.h"
				29	#include "IceTargetLoweringARM32.def"
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	30	#include "IceUtils.h"
John Porto	67f8de9	2015-06-25 10:14:17 -0700	[diff] [blame]	31	#include "llvm/Support/MathExtras.h"
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	32
				33	namespace Ice {
				34
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	35	namespace {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	36
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	37	void UnimplementedError(const ClFlags &Flags) {
				38	if (!Flags.getSkipUnimplemented()) {
				39	// Use llvm_unreachable instead of report_fatal_error, which gives better
				40	// stack traces.
				41	llvm_unreachable("Not yet implemented");
				42	abort();
				43	}
				44	}
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	45
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	46	// The following table summarizes the logic for lowering the icmp instruction
				47	// for i32 and narrower types. Each icmp condition has a clear mapping to an
				48	// ARM32 conditional move instruction.
				49
				50	const struct TableIcmp32_ {
				51	CondARM32::Cond Mapping;
				52	} TableIcmp32[] = {
				53	#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
				54	{ CondARM32::C_32 } \
				55	,
				56	ICMPARM32_TABLE
				57	#undef X
				58	};
				59	const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
				60
				61	// The following table summarizes the logic for lowering the icmp instruction
				62	// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
				63	// The operands may need to be swapped, and there is a slight difference
				64	// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
				65	const struct TableIcmp64_ {
				66	bool IsSigned;
				67	bool Swapped;
				68	CondARM32::Cond C1, C2;
				69	} TableIcmp64[] = {
				70	#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
				71	{ is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
				72	,
				73	ICMPARM32_TABLE
				74	#undef X
				75	};
				76	const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
				77
				78	CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
				79	size_t Index = static_cast<size_t>(Cond);
				80	assert(Index < TableIcmp32Size);
				81	return TableIcmp32[Index].Mapping;
				82	}
				83
				84	// In some cases, there are x-macros tables for both high-level and
				85	// low-level instructions/operands that use the same enum key value.
				86	// The tables are kept separate to maintain a proper separation
				87	// between abstraction layers. There is a risk that the tables could
				88	// get out of sync if enum values are reordered or if entries are
				89	// added or deleted. The following dummy namespaces use
				90	// static_asserts to ensure everything is kept in sync.
				91
				92	// Validate the enum values in ICMPARM32_TABLE.
				93	namespace dummy1 {
				94	// Define a temporary set of enum values based on low-level table
				95	// entries.
				96	enum _tmp_enum {
				97	#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
				98	ICMPARM32_TABLE
				99	#undef X
				100	_num
				101	};
				102	// Define a set of constants based on high-level table entries.
				103	#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
				104	ICEINSTICMP_TABLE
				105	#undef X
				106	// Define a set of constants based on low-level table entries, and
				107	// ensure the table entry keys are consistent.
				108	#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
				109	static const int _table2_##val = _tmp_##val; \
				110	static_assert( \
				111	_table1_##val == _table2_##val, \
				112	"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
				113	ICMPARM32_TABLE
				114	#undef X
				115	// Repeat the static asserts with respect to the high-level table
				116	// entries in case the high-level table has extra entries.
				117	#define X(tag, str) \
				118	static_assert( \
				119	_table1_##tag == _table2_##tag, \
				120	"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
				121	ICEINSTICMP_TABLE
				122	#undef X
				123	} // end of namespace dummy1
				124
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	125	// Stack alignment
				126	const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
				127
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	128	// Value is in bytes. Return Value adjusted to the next highest multiple
				129	// of the stack alignment.
				130	uint32_t applyStackAlignment(uint32_t Value) {
				131	return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
				132	}
				133
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	134	// Value is in bytes. Return Value adjusted to the next highest multiple
				135	// of the stack alignment required for the given type.
				136	uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
				137	// Use natural alignment, except that normally (non-NaCl) ARM only
				138	// aligns vectors to 8 bytes.
				139	// TODO(jvoung): Check this ...
				140	size_t typeAlignInBytes = typeWidthInBytes(Ty);
				141	if (isVectorType(Ty))
				142	typeAlignInBytes = 8;
				143	return Utils::applyAlignment(Value, typeAlignInBytes);
				144	}
				145
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	146	// Conservatively check if at compile time we know that the operand is
				147	// definitely a non-zero integer.
				148	bool isGuaranteedNonzeroInt(const Operand *Op) {
				149	if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
				150	return Const->getValue() != 0;
				151	}
				152	return false;
				153	}
				154
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	155	} // end of anonymous namespace
				156
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	157	TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
Jan Voung	d062f73	2015-06-15 17:17:31 -0700	[diff] [blame]	158	static_assert(
				159	(ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
				160	(TargetInstructionSet::ARM32InstructionSet_End -
				161	TargetInstructionSet::ARM32InstructionSet_Begin),
				162	"ARM32InstructionSet range different from TargetInstructionSet");
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	163	if (Flags.getTargetInstructionSet() !=
Jan Voung	d062f73	2015-06-15 17:17:31 -0700	[diff] [blame]	164	TargetInstructionSet::BaseInstructionSet) {
				165	InstructionSet = static_cast<ARM32InstructionSet>(
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	166	(Flags.getTargetInstructionSet() -
Jan Voung	d062f73	2015-06-15 17:17:31 -0700	[diff] [blame]	167	TargetInstructionSet::ARM32InstructionSet_Begin) +
				168	ARM32InstructionSet::Begin);
				169	}
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	170	}
				171
				172	TargetARM32::TargetARM32(Cfg *Func)
				173	: TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	174	// TODO: Don't initialize IntegerRegisters and friends every time.
				175	// Instead, initialize in some sort of static initializer for the
				176	// class.
				177	llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
				178	llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);
				179	llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
				180	llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
				181	ScratchRegs.resize(RegARM32::Reg_NUM);
				182	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				183	isFP) \
				184	IntegerRegisters[RegARM32::val] = isInt; \
				185	FloatRegisters[RegARM32::val] = isFP; \
				186	VectorRegisters[RegARM32::val] = isFP; \
				187	ScratchRegs[RegARM32::val] = scratch;
				188	REGARM32_TABLE;
				189	#undef X
				190	TypeToRegisterSet[IceType_void] = InvalidRegisters;
				191	TypeToRegisterSet[IceType_i1] = IntegerRegisters;
				192	TypeToRegisterSet[IceType_i8] = IntegerRegisters;
				193	TypeToRegisterSet[IceType_i16] = IntegerRegisters;
				194	TypeToRegisterSet[IceType_i32] = IntegerRegisters;
				195	TypeToRegisterSet[IceType_i64] = IntegerRegisters;
				196	TypeToRegisterSet[IceType_f32] = FloatRegisters;
				197	TypeToRegisterSet[IceType_f64] = FloatRegisters;
				198	TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
				199	TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
				200	TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
				201	TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
				202	TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
				203	TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
				204	TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
				205	}
				206
				207	void TargetARM32::translateO2() {
				208	TimerMarker T(TimerStack::TT_O2, Func);
				209
				210	// TODO(stichnot): share passes with X86?
				211	// https://code.google.com/p/nativeclient/issues/detail?id=4094
				212
				213	if (!Ctx->getFlags().getPhiEdgeSplit()) {
				214	// Lower Phi instructions.
				215	Func->placePhiLoads();
				216	if (Func->hasError())
				217	return;
				218	Func->placePhiStores();
				219	if (Func->hasError())
				220	return;
				221	Func->deletePhis();
				222	if (Func->hasError())
				223	return;
				224	Func->dump("After Phi lowering");
				225	}
				226
				227	// Address mode optimization.
				228	Func->getVMetadata()->init(VMK_SingleDefs);
				229	Func->doAddressOpt();
				230
				231	// Argument lowering
				232	Func->doArgLowering();
				233
				234	// Target lowering. This requires liveness analysis for some parts
				235	// of the lowering decisions, such as compare/branch fusing. If
				236	// non-lightweight liveness analysis is used, the instructions need
				237	// to be renumbered first. TODO: This renumbering should only be
				238	// necessary if we're actually calculating live intervals, which we
				239	// only do for register allocation.
				240	Func->renumberInstructions();
				241	if (Func->hasError())
				242	return;
				243
				244	// TODO: It should be sufficient to use the fastest liveness
				245	// calculation, i.e. livenessLightweight(). However, for some
				246	// reason that slows down the rest of the translation. Investigate.
				247	Func->liveness(Liveness_Basic);
				248	if (Func->hasError())
				249	return;
				250	Func->dump("After ARM32 address mode opt");
				251
				252	Func->genCode();
				253	if (Func->hasError())
				254	return;
				255	Func->dump("After ARM32 codegen");
				256
				257	// Register allocation. This requires instruction renumbering and
				258	// full liveness analysis.
				259	Func->renumberInstructions();
				260	if (Func->hasError())
				261	return;
				262	Func->liveness(Liveness_Intervals);
				263	if (Func->hasError())
				264	return;
				265	// Validate the live range computations. The expensive validation
				266	// call is deliberately only made when assertions are enabled.
				267	assert(Func->validateLiveness());
				268	// The post-codegen dump is done here, after liveness analysis and
				269	// associated cleanup, to make the dump cleaner and more useful.
				270	Func->dump("After initial ARM32 codegen");
				271	Func->getVMetadata()->init(VMK_All);
				272	regAlloc(RAK_Global);
				273	if (Func->hasError())
				274	return;
				275	Func->dump("After linear scan regalloc");
				276
				277	if (Ctx->getFlags().getPhiEdgeSplit()) {
				278	Func->advancedPhiLowering();
				279	Func->dump("After advanced Phi lowering");
				280	}
				281
				282	// Stack frame mapping.
				283	Func->genFrame();
				284	if (Func->hasError())
				285	return;
				286	Func->dump("After stack frame mapping");
				287
				288	Func->contractEmptyNodes();
				289	Func->reorderNodes();
				290
				291	// Branch optimization. This needs to be done just before code
				292	// emission. In particular, no transformations that insert or
				293	// reorder CfgNodes should be done after branch optimization. We go
				294	// ahead and do it before nop insertion to reduce the amount of work
				295	// needed for searching for opportunities.
				296	Func->doBranchOpt();
				297	Func->dump("After branch optimization");
				298
				299	// Nop insertion
				300	if (Ctx->getFlags().shouldDoNopInsertion()) {
				301	Func->doNopInsertion();
				302	}
				303	}
				304
				305	void TargetARM32::translateOm1() {
				306	TimerMarker T(TimerStack::TT_Om1, Func);
				307
				308	// TODO: share passes with X86?
				309
				310	Func->placePhiLoads();
				311	if (Func->hasError())
				312	return;
				313	Func->placePhiStores();
				314	if (Func->hasError())
				315	return;
				316	Func->deletePhis();
				317	if (Func->hasError())
				318	return;
				319	Func->dump("After Phi lowering");
				320
				321	Func->doArgLowering();
				322
				323	Func->genCode();
				324	if (Func->hasError())
				325	return;
				326	Func->dump("After initial ARM32 codegen");
				327
				328	regAlloc(RAK_InfOnly);
				329	if (Func->hasError())
				330	return;
				331	Func->dump("After regalloc of infinite-weight variables");
				332
				333	Func->genFrame();
				334	if (Func->hasError())
				335	return;
				336	Func->dump("After stack frame mapping");
				337
				338	// Nop insertion
				339	if (Ctx->getFlags().shouldDoNopInsertion()) {
				340	Func->doNopInsertion();
				341	}
				342	}
				343
				344	bool TargetARM32::doBranchOpt(Inst I, const CfgNode NextNode) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	345	if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
				346	return Br->optimizeBranch(NextNode);
				347	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	348	return false;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	349	}
				350
				351	IceString TargetARM32::RegNames[] = {
				352	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				353	isFP) \
				354	name,
				355	REGARM32_TABLE
				356	#undef X
				357	};
				358
				359	IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
				360	assert(RegNum < RegARM32::Reg_NUM);
				361	(void)Ty;
				362	return RegNames[RegNum];
				363	}
				364
				365	Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
				366	if (Ty == IceType_void)
				367	Ty = IceType_i32;
				368	if (PhysicalRegisters[Ty].empty())
				369	PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
				370	assert(RegNum < PhysicalRegisters[Ty].size());
				371	Variable *Reg = PhysicalRegisters[Ty][RegNum];
				372	if (Reg == nullptr) {
				373	Reg = Func->makeVariable(Ty);
				374	Reg->setRegNum(RegNum);
				375	PhysicalRegisters[Ty][RegNum] = Reg;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	376	// Specially mark SP and LR as an "argument" so that it is considered
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	377	// live upon function entry.
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	378	if (RegNum == RegARM32::Reg_sp \|\| RegNum == RegARM32::Reg_lr) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	379	Func->addImplicitArg(Reg);
				380	Reg->setIgnoreLiveness();
				381	}
				382	}
				383	return Reg;
				384	}
				385
				386	void TargetARM32::emitVariable(const Variable *Var) const {
				387	Ostream &Str = Ctx->getStrEmit();
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	388	if (Var->hasReg()) {
				389	Str << getRegName(Var->getRegNum(), Var->getType());
				390	return;
				391	}
				392	if (Var->getWeight().isInf()) {
				393	llvm::report_fatal_error(
				394	"Infinite-weight Variable has no register assigned");
				395	}
				396	int32_t Offset = Var->getStackOffset();
				397	if (!hasFramePointer())
				398	Offset += getStackAdjustment();
				399	// TODO(jvoung): Handle out of range. Perhaps we need a scratch register
				400	// to materialize a larger offset.
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	401	constexpr bool SignExt = false;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	402	if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) {
				403	llvm::report_fatal_error("Illegal stack offset");
				404	}
				405	const Type FrameSPTy = IceType_i32;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	406	Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy);
				407	if (Offset != 0) {
				408	Str << ", " << getConstantPrefix() << Offset;
				409	}
				410	Str << "]";
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	411	}
				412
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	413	bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
				414	if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
				415	return false;
				416	int32_t RegLo, RegHi;
				417	// Always start i64 registers at an even register, so this may end
				418	// up padding away a register.
				419	if (NumGPRRegsUsed % 2 != 0) {
				420	++NumGPRRegsUsed;
				421	}
				422	RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
				423	++NumGPRRegsUsed;
				424	RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
				425	++NumGPRRegsUsed;
				426	// If this bumps us past the boundary, don't allocate to a register
				427	// and leave any previously speculatively consumed registers as consumed.
				428	if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
				429	return false;
				430	Regs->first = RegLo;
				431	Regs->second = RegHi;
				432	return true;
				433	}
				434
				435	bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
				436	if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
				437	return false;
				438	*Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
				439	++NumGPRRegsUsed;
				440	return true;
				441	}
				442
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	443	void TargetARM32::lowerArguments() {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	444	VarList &Args = Func->getArgs();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	445	TargetARM32::CallingConv CC;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	446
				447	// For each register argument, replace Arg in the argument list with the
				448	// home register. Then generate an instruction in the prolog to copy the
				449	// home register to the assigned location of Arg.
				450	Context.init(Func->getEntryNode());
				451	Context.setInsertPoint(Context.getCur());
				452
				453	for (SizeT I = 0, E = Args.size(); I < E; ++I) {
				454	Variable *Arg = Args[I];
				455	Type Ty = Arg->getType();
				456	// TODO(jvoung): handle float/vector types.
				457	if (isVectorType(Ty)) {
				458	UnimplementedError(Func->getContext()->getFlags());
				459	continue;
				460	} else if (isFloatingType(Ty)) {
				461	UnimplementedError(Func->getContext()->getFlags());
				462	continue;
				463	} else if (Ty == IceType_i64) {
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	464	std::pair<int32_t, int32_t> RegPair;
				465	if (!CC.I64InRegs(&RegPair))
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	466	continue;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	467	Variable *RegisterArg = Func->makeVariable(Ty);
				468	Variable *RegisterLo = Func->makeVariable(IceType_i32);
				469	Variable *RegisterHi = Func->makeVariable(IceType_i32);
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	470	if (BuildDefs::dump()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	471	RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
				472	RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
				473	RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
				474	}
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	475	RegisterLo->setRegNum(RegPair.first);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	476	RegisterLo->setIsArg();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	477	RegisterHi->setRegNum(RegPair.second);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	478	RegisterHi->setIsArg();
				479	RegisterArg->setLoHi(RegisterLo, RegisterHi);
				480	RegisterArg->setIsArg();
				481	Arg->setIsArg(false);
				482
				483	Args[I] = RegisterArg;
				484	Context.insert(InstAssign::create(Func, Arg, RegisterArg));
				485	continue;
				486	} else {
				487	assert(Ty == IceType_i32);
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	488	int32_t RegNum;
				489	if (!CC.I32InReg(&RegNum))
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	490	continue;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	491	Variable *RegisterArg = Func->makeVariable(Ty);
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	492	if (BuildDefs::dump()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	493	RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
				494	}
				495	RegisterArg->setRegNum(RegNum);
				496	RegisterArg->setIsArg();
				497	Arg->setIsArg(false);
				498
				499	Args[I] = RegisterArg;
				500	Context.insert(InstAssign::create(Func, Arg, RegisterArg));
				501	}
				502	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	503	}
				504
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	505	// Helper function for addProlog().
				506	//
				507	// This assumes Arg is an argument passed on the stack. This sets the
				508	// frame offset for Arg and updates InArgsSizeBytes according to Arg's
				509	// width. For an I64 arg that has been split into Lo and Hi components,
				510	// it calls itself recursively on the components, taking care to handle
				511	// Lo first because of the little-endian architecture. Lastly, this
				512	// function generates an instruction to copy Arg into its assigned
				513	// register if applicable.
				514	void TargetARM32::finishArgumentLowering(Variable Arg, Variable FramePtr,
				515	size_t BasicFrameOffset,
				516	size_t &InArgsSizeBytes) {
				517	Variable *Lo = Arg->getLo();
				518	Variable *Hi = Arg->getHi();
				519	Type Ty = Arg->getType();
				520	if (Lo && Hi && Ty == IceType_i64) {
				521	assert(Lo->getType() != IceType_i64); // don't want infinite recursion
				522	assert(Hi->getType() != IceType_i64); // don't want infinite recursion
				523	finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				524	finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				525	return;
				526	}
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	527	InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	528	Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
				529	InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
				530	// If the argument variable has been assigned a register, we need to load
				531	// the value from the stack slot.
				532	if (Arg->hasReg()) {
				533	assert(Ty != IceType_i64);
				534	OperandARM32Mem *Mem = OperandARM32Mem::create(
				535	Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
				536	Ctx->getConstantInt32(Arg->getStackOffset())));
				537	if (isVectorType(Arg->getType())) {
				538	UnimplementedError(Func->getContext()->getFlags());
				539	} else {
				540	_ldr(Arg, Mem);
				541	}
				542	// This argument-copying instruction uses an explicit
				543	// OperandARM32Mem operand instead of a Variable, so its
				544	// fill-from-stack operation has to be tracked separately for
				545	// statistics.
				546	Ctx->statsUpdateFills();
				547	}
				548	}
				549
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	550	Type TargetARM32::stackSlotType() { return IceType_i32; }
				551
				552	void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	553	// Stack frame layout:
				554	//
				555	// +------------------------+
				556	// \| 1. preserved registers \|
				557	// +------------------------+
				558	// \| 2. padding \|
				559	// +------------------------+
				560	// \| 3. global spill area \|
				561	// +------------------------+
				562	// \| 4. padding \|
				563	// +------------------------+
				564	// \| 5. local spill area \|
				565	// +------------------------+
				566	// \| 6. padding \|
				567	// +------------------------+
				568	// \| 7. allocas \|
				569	// +------------------------+
				570	//
				571	// The following variables record the size in bytes of the given areas:
				572	// * PreservedRegsSizeBytes: area 1
				573	// * SpillAreaPaddingBytes: area 2
				574	// * GlobalsSize: area 3
				575	// * GlobalsAndSubsequentPaddingSize: areas 3 - 4
				576	// * LocalsSpillAreaSize: area 5
				577	// * SpillAreaSizeBytes: areas 2 - 6
				578	// Determine stack frame offsets for each Variable without a
				579	// register assignment. This can be done as one variable per stack
				580	// slot. Or, do coalescing by running the register allocator again
				581	// with an infinite set of registers (as a side effect, this gives
				582	// variables a second chance at physical register assignment).
				583	//
				584	// A middle ground approach is to leverage sparsity and allocate one
				585	// block of space on the frame for globals (variables with
				586	// multi-block lifetime), and one block to share for locals
				587	// (single-block lifetime).
				588
				589	Context.init(Node);
				590	Context.setInsertPoint(Context.getCur());
				591
				592	llvm::SmallBitVector CalleeSaves =
				593	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				594	RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
				595	VarList SortedSpilledVariables;
				596	size_t GlobalsSize = 0;
				597	// If there is a separate locals area, this represents that area.
				598	// Otherwise it counts any variable not counted by GlobalsSize.
				599	SpillAreaSizeBytes = 0;
				600	// If there is a separate locals area, this specifies the alignment
				601	// for it.
				602	uint32_t LocalsSlotsAlignmentBytes = 0;
				603	// The entire spill locations area gets aligned to largest natural
				604	// alignment of the variables that have a spill slot.
				605	uint32_t SpillAreaAlignmentBytes = 0;
				606	// For now, we don't have target-specific variables that need special
				607	// treatment (no stack-slot-linked SpillVariable type).
				608	std::function<bool(Variable *)> TargetVarHook =
				609	[](Variable *) { return false; };
				610
				611	// Compute the list of spilled variables and bounds for GlobalsSize, etc.
				612	getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
				613	&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
				614	&LocalsSlotsAlignmentBytes, TargetVarHook);
				615	uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
				616	SpillAreaSizeBytes += GlobalsSize;
				617
				618	// Add push instructions for preserved registers.
				619	// On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
				620	// Unlike x86, ARM also has callee-saved float/vector registers.
				621	// The "vpush" instruction can handle a whole list of float/vector
				622	// registers, but it only handles contiguous sequences of registers
				623	// by specifying the start and the length.
				624	VarList GPRsToPreserve;
				625	GPRsToPreserve.reserve(CalleeSaves.size());
				626	uint32_t NumCallee = 0;
				627	size_t PreservedRegsSizeBytes = 0;
				628	// Consider FP and LR as callee-save / used as needed.
				629	if (UsesFramePointer) {
				630	CalleeSaves[RegARM32::Reg_fp] = true;
				631	assert(RegsUsed[RegARM32::Reg_fp] == false);
				632	RegsUsed[RegARM32::Reg_fp] = true;
				633	}
				634	if (!MaybeLeafFunc) {
				635	CalleeSaves[RegARM32::Reg_lr] = true;
				636	RegsUsed[RegARM32::Reg_lr] = true;
				637	}
				638	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				639	if (CalleeSaves[i] && RegsUsed[i]) {
				640	// TODO(jvoung): do separate vpush for each floating point
				641	// register segment and += 4, or 8 depending on type.
				642	++NumCallee;
				643	PreservedRegsSizeBytes += 4;
				644	GPRsToPreserve.push_back(getPhysicalRegister(i));
				645	}
				646	}
				647	Ctx->statsUpdateRegistersSaved(NumCallee);
				648	if (!GPRsToPreserve.empty())
				649	_push(GPRsToPreserve);
				650
				651	// Generate "mov FP, SP" if needed.
				652	if (UsesFramePointer) {
				653	Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
				654	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				655	_mov(FP, SP);
				656	// Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
				657	Context.insert(InstFakeUse::create(Func, FP));
				658	}
				659
				660	// Align the variables area. SpillAreaPaddingBytes is the size of
				661	// the region after the preserved registers and before the spill areas.
				662	// LocalsSlotsPaddingBytes is the amount of padding between the globals
				663	// and locals area if they are separate.
				664	assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
				665	assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
				666	uint32_t SpillAreaPaddingBytes = 0;
				667	uint32_t LocalsSlotsPaddingBytes = 0;
				668	alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
				669	GlobalsSize, LocalsSlotsAlignmentBytes,
				670	&SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
				671	SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
				672	uint32_t GlobalsAndSubsequentPaddingSize =
				673	GlobalsSize + LocalsSlotsPaddingBytes;
				674
				675	// Align SP if necessary.
				676	if (NeedsStackAlignment) {
				677	uint32_t StackOffset = PreservedRegsSizeBytes;
				678	uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
				679	SpillAreaSizeBytes = StackSize - StackOffset;
				680	}
				681
				682	// Generate "sub sp, SpillAreaSizeBytes"
				683	if (SpillAreaSizeBytes) {
				684	// Use the IP inter-procedural scratch register if needed to legalize
				685	// the immediate.
				686	Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
				687	Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);
				688	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				689	_sub(SP, SP, SubAmount);
				690	}
				691	Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
				692
				693	resetStackAdjustment();
				694
				695	// Fill in stack offsets for stack args, and copy args into registers
				696	// for those that were register-allocated. Args are pushed right to
				697	// left, so Arg[0] is closest to the stack/frame pointer.
				698	Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
				699	size_t BasicFrameOffset = PreservedRegsSizeBytes;
				700	if (!UsesFramePointer)
				701	BasicFrameOffset += SpillAreaSizeBytes;
				702
				703	const VarList &Args = Func->getArgs();
				704	size_t InArgsSizeBytes = 0;
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	705	TargetARM32::CallingConv CC;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	706	for (Variable *Arg : Args) {
				707	Type Ty = Arg->getType();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	708	bool InRegs = false;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	709	// Skip arguments passed in registers.
				710	if (isVectorType(Ty)) {
				711	UnimplementedError(Func->getContext()->getFlags());
				712	continue;
				713	} else if (isFloatingType(Ty)) {
				714	UnimplementedError(Func->getContext()->getFlags());
				715	continue;
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	716	} else if (Ty == IceType_i64) {
				717	std::pair<int32_t, int32_t> DummyRegs;
				718	InRegs = CC.I64InRegs(&DummyRegs);
				719	} else {
				720	assert(Ty == IceType_i32);
				721	int32_t DummyReg;
				722	InRegs = CC.I32InReg(&DummyReg);
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	723	}
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	724	if (!InRegs)
				725	finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	726	}
				727
				728	// Fill in stack offsets for locals.
				729	assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
				730	SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
				731	UsesFramePointer);
				732	this->HasComputedFrame = true;
				733
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	734	if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	735	OstreamLocker L(Func->getContext());
				736	Ostream &Str = Func->getContext()->getStrDump();
				737
				738	Str << "Stack layout:\n";
				739	uint32_t SPAdjustmentPaddingSize =
				740	SpillAreaSizeBytes - LocalsSpillAreaSize -
				741	GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
				742	Str << " in-args = " << InArgsSizeBytes << " bytes\n"
				743	<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
				744	<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
				745	<< " globals spill area = " << GlobalsSize << " bytes\n"
				746	<< " globals-locals spill areas intermediate padding = "
				747	<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
				748	<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
				749	<< " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
				750
				751	Str << "Stack details:\n"
				752	<< " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
				753	<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
				754	<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
				755	<< " bytes\n"
				756	<< " is FP based = " << UsesFramePointer << "\n";
				757	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	758	}
				759
				760	void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	761	InstList &Insts = Node->getInsts();
				762	InstList::reverse_iterator RI, E;
				763	for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
				764	if (llvm::isa<InstARM32Ret>(*RI))
				765	break;
				766	}
				767	if (RI == E)
				768	return;
				769
				770	// Convert the reverse_iterator position into its corresponding
				771	// (forward) iterator position.
				772	InstList::iterator InsertPoint = RI.base();
				773	--InsertPoint;
				774	Context.init(Node);
				775	Context.setInsertPoint(InsertPoint);
				776
				777	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				778	if (UsesFramePointer) {
				779	Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
				780	// For late-stage liveness analysis (e.g. asm-verbose mode),
				781	// adding a fake use of SP before the assignment of SP=FP keeps
				782	// previous SP adjustments from being dead-code eliminated.
				783	Context.insert(InstFakeUse::create(Func, SP));
				784	_mov(SP, FP);
				785	} else {
				786	// add SP, SpillAreaSizeBytes
				787	if (SpillAreaSizeBytes) {
				788	// Use the IP inter-procedural scratch register if needed to legalize
				789	// the immediate. It shouldn't be live at this point.
				790	Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
				791	Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);
				792	_add(SP, SP, AddAmount);
				793	}
				794	}
				795
				796	// Add pop instructions for preserved registers.
				797	llvm::SmallBitVector CalleeSaves =
				798	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				799	VarList GPRsToRestore;
				800	GPRsToRestore.reserve(CalleeSaves.size());
				801	// Consider FP and LR as callee-save / used as needed.
				802	if (UsesFramePointer) {
				803	CalleeSaves[RegARM32::Reg_fp] = true;
				804	}
				805	if (!MaybeLeafFunc) {
				806	CalleeSaves[RegARM32::Reg_lr] = true;
				807	}
				808	// Pop registers in ascending order just like push
				809	// (instead of in reverse order).
				810	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				811	if (CalleeSaves[i] && RegsUsed[i]) {
				812	GPRsToRestore.push_back(getPhysicalRegister(i));
				813	}
				814	}
				815	if (!GPRsToRestore.empty())
				816	_pop(GPRsToRestore);
				817
				818	if (!Ctx->getFlags().getUseSandboxing())
				819	return;
				820
				821	// Change the original ret instruction into a sandboxed return sequence.
				822	// bundle_lock
				823	// bic lr, #0xc000000f
				824	// bx lr
				825	// bundle_unlock
				826	// This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
				827	// restrict to the lower 1GB as well.
				828	Operand *RetMask =
				829	legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg \| Legal_Flex);
				830	Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
				831	Variable *RetValue = nullptr;
				832	if (RI->getSrcSize())
				833	RetValue = llvm::cast<Variable>(RI->getSrc(0));
				834	_bundle_lock();
				835	_bic(LR, LR, RetMask);
				836	_ret(LR, RetValue);
				837	_bundle_unlock();
				838	RI->setDeleted();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	839	}
				840
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	841	void TargetARM32::split64(Variable *Var) {
				842	assert(Var->getType() == IceType_i64);
				843	Variable *Lo = Var->getLo();
				844	Variable *Hi = Var->getHi();
				845	if (Lo) {
				846	assert(Hi);
				847	return;
				848	}
				849	assert(Hi == nullptr);
				850	Lo = Func->makeVariable(IceType_i32);
				851	Hi = Func->makeVariable(IceType_i32);
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	852	if (BuildDefs::dump()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	853	Lo->setName(Func, Var->getName(Func) + "__lo");
				854	Hi->setName(Func, Var->getName(Func) + "__hi");
				855	}
				856	Var->setLoHi(Lo, Hi);
				857	if (Var->getIsArg()) {
				858	Lo->setIsArg();
				859	Hi->setIsArg();
				860	}
				861	}
				862
				863	Operand TargetARM32::loOperand(Operand Operand) {
				864	assert(Operand->getType() == IceType_i64);
				865	if (Operand->getType() != IceType_i64)
				866	return Operand;
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	867	if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	868	split64(Var);
				869	return Var->getLo();
				870	}
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	871	if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	872	return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
				873	}
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	874	if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	875	// Conservatively disallow memory operands with side-effects (pre/post
				876	// increment) in case of duplication.
				877	assert(Mem->getAddrMode() == OperandARM32Mem::Offset \|\|
				878	Mem->getAddrMode() == OperandARM32Mem::NegOffset);
				879	if (Mem->isRegReg()) {
				880	return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
				881	Mem->getIndex(), Mem->getShiftOp(),
				882	Mem->getShiftAmt(), Mem->getAddrMode());
				883	} else {
				884	return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
				885	Mem->getOffset(), Mem->getAddrMode());
				886	}
				887	}
				888	llvm_unreachable("Unsupported operand type");
				889	return nullptr;
				890	}
				891
				892	Operand TargetARM32::hiOperand(Operand Operand) {
				893	assert(Operand->getType() == IceType_i64);
				894	if (Operand->getType() != IceType_i64)
				895	return Operand;
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	896	if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	897	split64(Var);
				898	return Var->getHi();
				899	}
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	900	if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	901	return Ctx->getConstantInt32(
				902	static_cast<uint32_t>(Const->getValue() >> 32));
				903	}
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	904	if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	905	// Conservatively disallow memory operands with side-effects
				906	// in case of duplication.
				907	assert(Mem->getAddrMode() == OperandARM32Mem::Offset \|\|
				908	Mem->getAddrMode() == OperandARM32Mem::NegOffset);
				909	const Type SplitType = IceType_i32;
				910	if (Mem->isRegReg()) {
				911	// We have to make a temp variable T, and add 4 to either Base or Index.
				912	// The Index may be shifted, so adding 4 can mean something else.
				913	// Thus, prefer T := Base + 4, and use T as the new Base.
				914	Variable *Base = Mem->getBase();
				915	Constant *Four = Ctx->getConstantInt32(4);
				916	Variable *NewBase = Func->makeVariable(Base->getType());
				917	lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
				918	Base, Four));
				919	return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
				920	Mem->getShiftOp(), Mem->getShiftAmt(),
				921	Mem->getAddrMode());
				922	} else {
				923	Variable *Base = Mem->getBase();
				924	ConstantInteger32 *Offset = Mem->getOffset();
				925	assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
				926	int32_t NextOffsetVal = Offset->getValue() + 4;
				927	const bool SignExt = false;
				928	if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
				929	// We have to make a temp variable and add 4 to either Base or Offset.
				930	// If we add 4 to Offset, this will convert a non-RegReg addressing
				931	// mode into a RegReg addressing mode. Since NaCl sandboxing disallows
				932	// RegReg addressing modes, prefer adding to base and replacing instead.
				933	// Thus we leave the old offset alone.
				934	Constant *Four = Ctx->getConstantInt32(4);
				935	Variable *NewBase = Func->makeVariable(Base->getType());
				936	lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
				937	NewBase, Base, Four));
				938	Base = NewBase;
				939	} else {
				940	Offset =
				941	llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
				942	}
				943	return OperandARM32Mem::create(Func, SplitType, Base, Offset,
				944	Mem->getAddrMode());
				945	}
				946	}
				947	llvm_unreachable("Unsupported operand type");
				948	return nullptr;
				949	}
				950
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	951	llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
				952	RegSetMask Exclude) const {
				953	llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
				954
				955	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				956	isFP) \
				957	if (scratch && (Include & RegSet_CallerSave)) \
				958	Registers[RegARM32::val] = true; \
				959	if (preserved && (Include & RegSet_CalleeSave)) \
				960	Registers[RegARM32::val] = true; \
				961	if (stackptr && (Include & RegSet_StackPointer)) \
				962	Registers[RegARM32::val] = true; \
				963	if (frameptr && (Include & RegSet_FramePointer)) \
				964	Registers[RegARM32::val] = true; \
				965	if (scratch && (Exclude & RegSet_CallerSave)) \
				966	Registers[RegARM32::val] = false; \
				967	if (preserved && (Exclude & RegSet_CalleeSave)) \
				968	Registers[RegARM32::val] = false; \
				969	if (stackptr && (Exclude & RegSet_StackPointer)) \
				970	Registers[RegARM32::val] = false; \
				971	if (frameptr && (Exclude & RegSet_FramePointer)) \
				972	Registers[RegARM32::val] = false;
				973
				974	REGARM32_TABLE
				975
				976	#undef X
				977
				978	return Registers;
				979	}
				980
				981	void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
				982	UsesFramePointer = true;
				983	// Conservatively require the stack to be aligned. Some stack
				984	// adjustment operations implemented below assume that the stack is
				985	// aligned before the alloca. All the alloca code ensures that the
				986	// stack alignment is preserved after the alloca. The stack alignment
				987	// restriction can be relaxed in some cases.
				988	NeedsStackAlignment = true;
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	989
				990	// TODO(stichnot): minimize the number of adjustments of SP, etc.
				991	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				992	Variable *Dest = Inst->getDest();
				993	uint32_t AlignmentParam = Inst->getAlignInBytes();
				994	// For default align=0, set it to the real value 1, to avoid any
				995	// bit-manipulation problems below.
				996	AlignmentParam = std::max(AlignmentParam, 1u);
				997
				998	// LLVM enforces power of 2 alignment.
				999	assert(llvm::isPowerOf2_32(AlignmentParam));
				1000	assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
				1001
				1002	uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
				1003	if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
				1004	alignRegisterPow2(SP, Alignment);
				1005	}
				1006	Operand *TotalSize = Inst->getSizeInBytes();
				1007	if (const auto *ConstantTotalSize =
				1008	llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
				1009	uint32_t Value = ConstantTotalSize->getValue();
				1010	Value = Utils::applyAlignment(Value, Alignment);
				1011	Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
				1012	_sub(SP, SP, SubAmount);
				1013	} else {
				1014	// Non-constant sizes need to be adjusted to the next highest
				1015	// multiple of the required alignment at runtime.
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	1016	TotalSize = legalize(TotalSize, Legal_Reg \| Legal_Flex);
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	1017	Variable *T = makeReg(IceType_i32);
				1018	_mov(T, TotalSize);
				1019	Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
				1020	_add(T, T, AddAmount);
				1021	alignRegisterPow2(T, Alignment);
				1022	_sub(SP, SP, T);
				1023	}
				1024	_mov(Dest, SP);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1025	}
				1026
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	1027	void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {
				1028	if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))
				1029	return;
				1030	Variable *SrcLoReg = legalizeToVar(SrcLo);
				1031	switch (Ty) {
				1032	default:
				1033	llvm_unreachable("Unexpected type");
				1034	case IceType_i8: {
				1035	Operand *Mask =
				1036	legalize(Ctx->getConstantInt32(0xFF), Legal_Reg \| Legal_Flex);
				1037	_tst(SrcLoReg, Mask);
				1038	break;
				1039	}
				1040	case IceType_i16: {
				1041	Operand *Mask =
				1042	legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg \| Legal_Flex);
				1043	_tst(SrcLoReg, Mask);
				1044	break;
				1045	}
				1046	case IceType_i32: {
				1047	_tst(SrcLoReg, SrcLoReg);
				1048	break;
				1049	}
				1050	case IceType_i64: {
				1051	Variable *ScratchReg = makeReg(IceType_i32);
				1052	_orrs(ScratchReg, SrcLoReg, SrcHi);
				1053	// ScratchReg isn't going to be used, but we need the
				1054	// side-effect of setting flags from this operation.
				1055	Context.insert(InstFakeUse::create(Func, ScratchReg));
				1056	}
				1057	}
				1058	InstARM32Label *Label = InstARM32Label::create(Func, this);
				1059	_br(Label, CondARM32::NE);
				1060	_trap();
				1061	Context.insert(Label);
				1062	}
				1063
				1064	void TargetARM32::lowerIDivRem(Variable Dest, Variable T, Variable *Src0R,
				1065	Operand *Src1, ExtInstr ExtFunc,
				1066	DivInstr DivFunc, const char *DivHelperName,
				1067	bool IsRemainder) {
				1068	div0Check(Dest->getType(), Src1, nullptr);
				1069	Variable *Src1R = legalizeToVar(Src1);
				1070	Variable *T0R = Src0R;
				1071	Variable *T1R = Src1R;
				1072	if (Dest->getType() != IceType_i32) {
				1073	T0R = makeReg(IceType_i32);
				1074	(this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
				1075	T1R = makeReg(IceType_i32);
				1076	(this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
				1077	}
				1078	if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
				1079	(this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
				1080	if (IsRemainder) {
				1081	Variable *T2 = makeReg(IceType_i32);
				1082	_mls(T2, T, T1R, T0R);
				1083	T = T2;
				1084	}
				1085	_mov(Dest, T);
				1086	} else {
				1087	constexpr SizeT MaxSrcs = 2;
				1088	InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
				1089	Call->addArg(T0R);
				1090	Call->addArg(T1R);
				1091	lowerCall(Call);
				1092	}
				1093	return;
				1094	}
				1095
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1096	void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1097	Variable *Dest = Inst->getDest();
				1098	// TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
				1099	// to legalize Src0 to flex or Src1 to flex and there is a reversible
				1100	// instruction. E.g., reverse subtract with immediate, register vs
				1101	// register, immediate.
				1102	// Or it may be the case that the operands aren't swapped, but the
				1103	// bits can be flipped and a different operation applied.
				1104	// E.g., use BIC (bit clear) instead of AND for some masks.
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	1105	Operand *Src0 = legalizeUndef(Inst->getSrc(0));
				1106	Operand *Src1 = legalizeUndef(Inst->getSrc(1));
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1107	if (Dest->getType() == IceType_i64) {
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1108	// These helper-call-involved instructions are lowered in this
				1109	// separate switch. This is because we would otherwise assume that
				1110	// we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused
				1111	// with helper calls, and such unused/redundant instructions will fail
				1112	// liveness analysis under -Om1 setting.
				1113	switch (Inst->getOp()) {
				1114	default:
				1115	break;
				1116	case InstArithmetic::Udiv:
				1117	case InstArithmetic::Sdiv:
				1118	case InstArithmetic::Urem:
				1119	case InstArithmetic::Srem: {
				1120	// Check for divide by 0 (ARM normally doesn't trap, but we want it
				1121	// to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
				1122	// to a register, which will hide a constant source operand.
				1123	// Instead, check the not-yet-legalized Src1 to optimize-out a divide
				1124	// by 0 check.
				1125	if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
				1126	if (C64->getValue() == 0) {
				1127	_trap();
				1128	return;
				1129	}
				1130	} else {
				1131	Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);
				1132	Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);
				1133	div0Check(IceType_i64, Src1Lo, Src1Hi);
				1134	}
				1135	// Technically, ARM has their own aeabi routines, but we can use the
				1136	// non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,
				1137	// but uses the more standard __moddi3 for rem.
				1138	const char *HelperName = "";
				1139	switch (Inst->getOp()) {
				1140	default:
				1141	llvm_unreachable("Should have only matched div ops.");
				1142	break;
				1143	case InstArithmetic::Udiv:
				1144	HelperName = H_udiv_i64;
				1145	break;
				1146	case InstArithmetic::Sdiv:
				1147	HelperName = H_sdiv_i64;
				1148	break;
				1149	case InstArithmetic::Urem:
				1150	HelperName = H_urem_i64;
				1151	break;
				1152	case InstArithmetic::Srem:
				1153	HelperName = H_srem_i64;
				1154	break;
				1155	}
				1156	constexpr SizeT MaxSrcs = 2;
				1157	InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
				1158	Call->addArg(Src0);
				1159	Call->addArg(Src1);
				1160	lowerCall(Call);
				1161	return;
				1162	}
				1163	}
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1164	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1165	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1166	Variable *Src0RLo = legalizeToVar(loOperand(Src0));
				1167	Variable *Src0RHi = legalizeToVar(hiOperand(Src0));
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1168	Operand *Src1Lo = loOperand(Src1);
				1169	Operand *Src1Hi = hiOperand(Src1);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1170	Variable *T_Lo = makeReg(DestLo->getType());
				1171	Variable *T_Hi = makeReg(DestHi->getType());
				1172	switch (Inst->getOp()) {
				1173	case InstArithmetic::_num:
				1174	llvm_unreachable("Unknown arithmetic operator");
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1175	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1176	case InstArithmetic::Add:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1177	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1178	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1179	_adds(T_Lo, Src0RLo, Src1Lo);
				1180	_mov(DestLo, T_Lo);
				1181	_adc(T_Hi, Src0RHi, Src1Hi);
				1182	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1183	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1184	case InstArithmetic::And:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1185	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1186	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1187	_and(T_Lo, Src0RLo, Src1Lo);
				1188	_mov(DestLo, T_Lo);
				1189	_and(T_Hi, Src0RHi, Src1Hi);
				1190	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1191	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1192	case InstArithmetic::Or:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1193	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1194	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1195	_orr(T_Lo, Src0RLo, Src1Lo);
				1196	_mov(DestLo, T_Lo);
				1197	_orr(T_Hi, Src0RHi, Src1Hi);
				1198	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1199	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1200	case InstArithmetic::Xor:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1201	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1202	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1203	_eor(T_Lo, Src0RLo, Src1Lo);
				1204	_mov(DestLo, T_Lo);
				1205	_eor(T_Hi, Src0RHi, Src1Hi);
				1206	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1207	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1208	case InstArithmetic::Sub:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1209	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1210	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1211	_subs(T_Lo, Src0RLo, Src1Lo);
				1212	_mov(DestLo, T_Lo);
				1213	_sbc(T_Hi, Src0RHi, Src1Hi);
				1214	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1215	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1216	case InstArithmetic::Mul: {
				1217	// GCC 4.8 does:
				1218	// a=b*c ==>
				1219	// t_acc =(mul) (b.lo * c.hi)
				1220	// t_acc =(mla) (c.lo * b.hi) + t_acc
				1221	// t.hi,t.lo =(umull) b.lo * c.lo
				1222	// t.hi += t_acc
				1223	// a.lo = t.lo
				1224	// a.hi = t.hi
				1225	//
				1226	// LLVM does:
				1227	// t.hi,t.lo =(umull) b.lo * c.lo
				1228	// t.hi =(mla) (b.lo * c.hi) + t.hi
				1229	// t.hi =(mla) (b.hi * c.lo) + t.hi
				1230	// a.lo = t.lo
				1231	// a.hi = t.hi
				1232	//
				1233	// LLVM's lowering has fewer instructions, but more register pressure:
				1234	// t.lo is live from beginning to end, while GCC delays the two-dest
				1235	// instruction till the end, and kills c.hi immediately.
				1236	Variable *T_Acc = makeReg(IceType_i32);
				1237	Variable *T_Acc1 = makeReg(IceType_i32);
				1238	Variable *T_Hi1 = makeReg(IceType_i32);
				1239	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1240	Variable *Src1RHi = legalizeToVar(Src1Hi);
				1241	_mul(T_Acc, Src0RLo, Src1RHi);
				1242	_mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
				1243	_umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
				1244	_add(T_Hi, T_Hi1, T_Acc1);
				1245	_mov(DestLo, T_Lo);
				1246	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1247	return;
				1248	}
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1249	case InstArithmetic::Shl: {
				1250	// a=b<<c ==>
				1251	// GCC 4.8 does:
				1252	// sub t_c1, c.lo, #32
				1253	// lsl t_hi, b.hi, c.lo
				1254	// orr t_hi, t_hi, b.lo, lsl t_c1
				1255	// rsb t_c2, c.lo, #32
				1256	// orr t_hi, t_hi, b.lo, lsr t_c2
				1257	// lsl t_lo, b.lo, c.lo
				1258	// a.lo = t_lo
				1259	// a.hi = t_hi
				1260	// Can be strength-reduced for constant-shifts, but we don't do
				1261	// that for now.
				1262	// Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
				1263	// On ARM, shifts only take the lower 8 bits of the shift register,
				1264	// and saturate to the range 0-32, so the negative value will
				1265	// saturate to 32.
				1266	Variable *T_Hi = makeReg(IceType_i32);
				1267	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1268	Constant *ThirtyTwo = Ctx->getConstantInt32(32);
				1269	Variable *T_C1 = makeReg(IceType_i32);
				1270	Variable *T_C2 = makeReg(IceType_i32);
				1271	_sub(T_C1, Src1RLo, ThirtyTwo);
				1272	_lsl(T_Hi, Src0RHi, Src1RLo);
				1273	_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1274	OperandARM32::LSL, T_C1));
				1275	_rsb(T_C2, Src1RLo, ThirtyTwo);
				1276	_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1277	OperandARM32::LSR, T_C2));
				1278	_mov(DestHi, T_Hi);
				1279	Variable *T_Lo = makeReg(IceType_i32);
				1280	// _mov seems to sometimes have better register preferencing than lsl.
				1281	// Otherwise mov w/ lsl shifted register is a pseudo-instruction
				1282	// that maps to lsl.
				1283	_mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1284	OperandARM32::LSL, Src1RLo));
				1285	_mov(DestLo, T_Lo);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1286	return;
				1287	}
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1288	case InstArithmetic::Lshr:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1289	// a=b>>c (unsigned) ==>
				1290	// GCC 4.8 does:
				1291	// rsb t_c1, c.lo, #32
				1292	// lsr t_lo, b.lo, c.lo
				1293	// orr t_lo, t_lo, b.hi, lsl t_c1
				1294	// sub t_c2, c.lo, #32
				1295	// orr t_lo, t_lo, b.hi, lsr t_c2
				1296	// lsr t_hi, b.hi, c.lo
				1297	// a.lo = t_lo
				1298	// a.hi = t_hi
				1299	case InstArithmetic::Ashr: {
				1300	// a=b>>c (signed) ==> ...
				1301	// Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
				1302	// and the next orr should be conditioned on PLUS. The last two
				1303	// right shifts should also be arithmetic.
				1304	bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
				1305	Variable *T_Lo = makeReg(IceType_i32);
				1306	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1307	Constant *ThirtyTwo = Ctx->getConstantInt32(32);
				1308	Variable *T_C1 = makeReg(IceType_i32);
				1309	Variable *T_C2 = makeReg(IceType_i32);
				1310	_rsb(T_C1, Src1RLo, ThirtyTwo);
				1311	_lsr(T_Lo, Src0RLo, Src1RLo);
				1312	_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1313	OperandARM32::LSL, T_C1));
				1314	OperandARM32::ShiftKind RShiftKind;
				1315	CondARM32::Cond Pred;
				1316	if (IsAshr) {
				1317	_subs(T_C2, Src1RLo, ThirtyTwo);
				1318	RShiftKind = OperandARM32::ASR;
				1319	Pred = CondARM32::PL;
				1320	} else {
				1321	_sub(T_C2, Src1RLo, ThirtyTwo);
				1322	RShiftKind = OperandARM32::LSR;
				1323	Pred = CondARM32::AL;
				1324	}
				1325	_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1326	RShiftKind, T_C2),
				1327	Pred);
				1328	_mov(DestLo, T_Lo);
				1329	Variable *T_Hi = makeReg(IceType_i32);
				1330	_mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1331	RShiftKind, Src1RLo));
				1332	_mov(DestHi, T_Hi);
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	1333	return;
				1334	}
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1335	case InstArithmetic::Fadd:
				1336	case InstArithmetic::Fsub:
				1337	case InstArithmetic::Fmul:
				1338	case InstArithmetic::Fdiv:
				1339	case InstArithmetic::Frem:
				1340	llvm_unreachable("FP instruction with i64 type");
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1341	return;
				1342	case InstArithmetic::Udiv:
				1343	case InstArithmetic::Sdiv:
				1344	case InstArithmetic::Urem:
				1345	case InstArithmetic::Srem:
				1346	llvm_unreachable("Call-helper-involved instruction for i64 type "
				1347	"should have already been handled before");
				1348	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1349	}
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1350	return;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1351	} else if (isVectorType(Dest->getType())) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1352	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1353	return;
				1354	}
				1355	// Dest->getType() is a non-i64 scalar.
				1356	Variable *Src0R = legalizeToVar(Src0);
				1357	Variable *T = makeReg(Dest->getType());
				1358	// Handle div/rem separately. They require a non-legalized Src1 to inspect
				1359	// whether or not Src1 is a non-zero constant. Once legalized it is more
				1360	// difficult to determine (constant may be moved to a register).
				1361	switch (Inst->getOp()) {
				1362	default:
				1363	break;
				1364	case InstArithmetic::Udiv: {
				1365	constexpr bool IsRemainder = false;
				1366	lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
				1367	H_udiv_i32, IsRemainder);
				1368	return;
				1369	}
				1370	case InstArithmetic::Sdiv: {
				1371	constexpr bool IsRemainder = false;
				1372	lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
				1373	H_sdiv_i32, IsRemainder);
				1374	return;
				1375	}
				1376	case InstArithmetic::Urem: {
				1377	constexpr bool IsRemainder = true;
				1378	lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
				1379	H_urem_i32, IsRemainder);
				1380	return;
				1381	}
				1382	case InstArithmetic::Srem: {
				1383	constexpr bool IsRemainder = true;
				1384	lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
				1385	H_srem_i32, IsRemainder);
				1386	return;
				1387	}
				1388	}
				1389
				1390	Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);
				1391	switch (Inst->getOp()) {
				1392	case InstArithmetic::_num:
				1393	llvm_unreachable("Unknown arithmetic operator");
				1394	return;
				1395	case InstArithmetic::Add:
				1396	_add(T, Src0R, Src1RF);
				1397	_mov(Dest, T);
				1398	return;
				1399	case InstArithmetic::And:
				1400	_and(T, Src0R, Src1RF);
				1401	_mov(Dest, T);
				1402	return;
				1403	case InstArithmetic::Or:
				1404	_orr(T, Src0R, Src1RF);
				1405	_mov(Dest, T);
				1406	return;
				1407	case InstArithmetic::Xor:
				1408	_eor(T, Src0R, Src1RF);
				1409	_mov(Dest, T);
				1410	return;
				1411	case InstArithmetic::Sub:
				1412	_sub(T, Src0R, Src1RF);
				1413	_mov(Dest, T);
				1414	return;
				1415	case InstArithmetic::Mul: {
				1416	Variable *Src1R = legalizeToVar(Src1RF);
				1417	_mul(T, Src0R, Src1R);
				1418	_mov(Dest, T);
				1419	return;
				1420	}
				1421	case InstArithmetic::Shl:
				1422	_lsl(T, Src0R, Src1RF);
				1423	_mov(Dest, T);
				1424	return;
				1425	case InstArithmetic::Lshr:
				1426	_lsr(T, Src0R, Src1RF);
				1427	_mov(Dest, T);
				1428	return;
				1429	case InstArithmetic::Ashr:
				1430	_asr(T, Src0R, Src1RF);
				1431	_mov(Dest, T);
				1432	return;
				1433	case InstArithmetic::Udiv:
				1434	case InstArithmetic::Sdiv:
				1435	case InstArithmetic::Urem:
				1436	case InstArithmetic::Srem:
				1437	llvm_unreachable("Integer div/rem should have been handled earlier.");
				1438	return;
				1439	case InstArithmetic::Fadd:
				1440	UnimplementedError(Func->getContext()->getFlags());
				1441	return;
				1442	case InstArithmetic::Fsub:
				1443	UnimplementedError(Func->getContext()->getFlags());
				1444	return;
				1445	case InstArithmetic::Fmul:
				1446	UnimplementedError(Func->getContext()->getFlags());
				1447	return;
				1448	case InstArithmetic::Fdiv:
				1449	UnimplementedError(Func->getContext()->getFlags());
				1450	return;
				1451	case InstArithmetic::Frem:
				1452	UnimplementedError(Func->getContext()->getFlags());
				1453	return;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1454	}
				1455	}
				1456
				1457	void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1458	Variable *Dest = Inst->getDest();
				1459	Operand *Src0 = Inst->getSrc(0);
				1460	assert(Dest->getType() == Src0->getType());
				1461	if (Dest->getType() == IceType_i64) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	1462	Src0 = legalizeUndef(Src0);
				1463	Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);
				1464	Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1465	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1466	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1467	Variable T_Lo = nullptr, T_Hi = nullptr;
				1468	_mov(T_Lo, Src0Lo);
				1469	_mov(DestLo, T_Lo);
				1470	_mov(T_Hi, Src0Hi);
				1471	_mov(DestHi, T_Hi);
				1472	} else {
				1473	Operand *SrcR;
				1474	if (Dest->hasReg()) {
				1475	// If Dest already has a physical register, then legalize the
				1476	// Src operand into a Variable with the same register
				1477	// assignment. This is mostly a workaround for advanced phi
				1478	// lowering's ad-hoc register allocation which assumes no
				1479	// register allocation is needed when at least one of the
				1480	// operands is non-memory.
				1481	// TODO(jvoung): check this for ARM.
				1482	SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
				1483	} else {
				1484	// Dest could be a stack operand. Since we could potentially need
				1485	// to do a Store (and store can only have Register operands),
				1486	// legalize this to a register.
				1487	SrcR = legalize(Src0, Legal_Reg);
				1488	}
				1489	if (isVectorType(Dest->getType())) {
				1490	UnimplementedError(Func->getContext()->getFlags());
				1491	} else {
				1492	_mov(Dest, SrcR);
				1493	}
				1494	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1495	}
				1496
				1497	void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1498	if (Inst->isUnconditional()) {
				1499	_br(Inst->getTargetUnconditional());
				1500	return;
				1501	}
				1502	Operand *Cond = Inst->getCondition();
				1503	// TODO(jvoung): Handle folding opportunities.
				1504
				1505	Variable *Src0R = legalizeToVar(Cond);
				1506	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1507	_cmp(Src0R, Zero);
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	1508	_br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1509	}
				1510
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1511	void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	1512	MaybeLeafFunc = false;
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1513	NeedsStackAlignment = true;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	1514
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1515	// Assign arguments to registers and stack. Also reserve stack.
				1516	TargetARM32::CallingConv CC;
				1517	// Pair of Arg Operand -> GPR number assignments.
				1518	llvm::SmallVector<std::pair<Operand *, int32_t>,
				1519	TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
				1520	// Pair of Arg Operand -> stack offset.
				1521	llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
				1522	int32_t ParameterAreaSizeBytes = 0;
				1523
				1524	// Classify each argument operand according to the location where the
				1525	// argument is passed.
				1526	for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	1527	Operand *Arg = legalizeUndef(Instr->getArg(i));
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1528	Type Ty = Arg->getType();
				1529	bool InRegs = false;
				1530	if (isVectorType(Ty)) {
				1531	UnimplementedError(Func->getContext()->getFlags());
				1532	} else if (isFloatingType(Ty)) {
				1533	UnimplementedError(Func->getContext()->getFlags());
				1534	} else if (Ty == IceType_i64) {
				1535	std::pair<int32_t, int32_t> Regs;
				1536	if (CC.I64InRegs(&Regs)) {
				1537	InRegs = true;
				1538	Operand *Lo = loOperand(Arg);
				1539	Operand *Hi = hiOperand(Arg);
				1540	GPRArgs.push_back(std::make_pair(Lo, Regs.first));
				1541	GPRArgs.push_back(std::make_pair(Hi, Regs.second));
				1542	}
				1543	} else {
				1544	assert(Ty == IceType_i32);
				1545	int32_t Reg;
				1546	if (CC.I32InReg(&Reg)) {
				1547	InRegs = true;
				1548	GPRArgs.push_back(std::make_pair(Arg, Reg));
				1549	}
				1550	}
				1551
				1552	if (!InRegs) {
				1553	ParameterAreaSizeBytes =
				1554	applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
				1555	StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
				1556	ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
				1557	}
				1558	}
				1559
				1560	// Adjust the parameter area so that the stack is aligned. It is
				1561	// assumed that the stack is already aligned at the start of the
				1562	// calling sequence.
				1563	ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
				1564
				1565	// Subtract the appropriate amount for the argument area. This also
				1566	// takes care of setting the stack adjustment during emission.
				1567	//
				1568	// TODO: If for some reason the call instruction gets dead-code
				1569	// eliminated after lowering, we would need to ensure that the
				1570	// pre-call and the post-call esp adjustment get eliminated as well.
				1571	if (ParameterAreaSizeBytes) {
				1572	Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
				1573	Legal_Reg \| Legal_Flex);
				1574	_adjust_stack(ParameterAreaSizeBytes, SubAmount);
				1575	}
				1576
				1577	// Copy arguments that are passed on the stack to the appropriate
				1578	// stack locations.
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	1579	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1580	for (auto &StackArg : StackArgs) {
				1581	ConstantInteger32 *Loc =
				1582	llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
				1583	Type Ty = StackArg.first->getType();
				1584	OperandARM32Mem *Addr;
				1585	constexpr bool SignExt = false;
				1586	if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
				1587	Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
				1588	} else {
				1589	Variable *NewBase = Func->makeVariable(SP->getType());
				1590	lowerArithmetic(
				1591	InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
				1592	Addr = formMemoryOperand(NewBase, Ty);
				1593	}
				1594	lowerStore(InstStore::create(Func, StackArg.first, Addr));
				1595	}
				1596
				1597	// Copy arguments to be passed in registers to the appropriate registers.
				1598	for (auto &GPRArg : GPRArgs) {
				1599	Variable *Reg = legalizeToVar(GPRArg.first, GPRArg.second);
				1600	// Generate a FakeUse of register arguments so that they do not get
				1601	// dead code eliminated as a result of the FakeKill of scratch
				1602	// registers after the call.
				1603	Context.insert(InstFakeUse::create(Func, Reg));
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1604	}
				1605
				1606	// Generate the call instruction. Assign its result to a temporary
				1607	// with high register allocation weight.
				1608	Variable *Dest = Instr->getDest();
				1609	// ReturnReg doubles as ReturnRegLo as necessary.
				1610	Variable *ReturnReg = nullptr;
				1611	Variable *ReturnRegHi = nullptr;
				1612	if (Dest) {
				1613	switch (Dest->getType()) {
				1614	case IceType_NUM:
				1615	llvm_unreachable("Invalid Call dest type");
				1616	break;
				1617	case IceType_void:
				1618	break;
				1619	case IceType_i1:
				1620	case IceType_i8:
				1621	case IceType_i16:
				1622	case IceType_i32:
				1623	ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
				1624	break;
				1625	case IceType_i64:
				1626	ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
				1627	ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
				1628	break;
				1629	case IceType_f32:
				1630	case IceType_f64:
				1631	// Use S and D regs.
				1632	UnimplementedError(Func->getContext()->getFlags());
				1633	break;
				1634	case IceType_v4i1:
				1635	case IceType_v8i1:
				1636	case IceType_v16i1:
				1637	case IceType_v16i8:
				1638	case IceType_v8i16:
				1639	case IceType_v4i32:
				1640	case IceType_v4f32:
				1641	// Use Q regs.
				1642	UnimplementedError(Func->getContext()->getFlags());
				1643	break;
				1644	}
				1645	}
				1646	Operand *CallTarget = Instr->getCallTarget();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1647	// TODO(jvoung): Handle sandboxing.
				1648	// const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
				1649
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1650	// Allow ConstantRelocatable to be left alone as a direct call,
				1651	// but force other constants like ConstantInteger32 to be in
				1652	// a register and make it an indirect call.
				1653	if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
				1654	CallTarget = legalize(CallTarget, Legal_Reg);
				1655	}
				1656	Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
				1657	Context.insert(NewCall);
				1658	if (ReturnRegHi)
				1659	Context.insert(InstFakeDef::create(Func, ReturnRegHi));
				1660
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1661	// Add the appropriate offset to SP. The call instruction takes care
				1662	// of resetting the stack offset during emission.
				1663	if (ParameterAreaSizeBytes) {
				1664	Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
				1665	Legal_Reg \| Legal_Flex);
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	1666	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1667	_add(SP, SP, AddAmount);
				1668	}
				1669
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1670	// Insert a register-kill pseudo instruction.
				1671	Context.insert(InstFakeKill::create(Func, NewCall));
				1672
				1673	// Generate a FakeUse to keep the call live if necessary.
				1674	if (Instr->hasSideEffects() && ReturnReg) {
				1675	Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
				1676	Context.insert(FakeUse);
				1677	}
				1678
				1679	if (!Dest)
				1680	return;
				1681
				1682	// Assign the result of the call to Dest.
				1683	if (ReturnReg) {
				1684	if (ReturnRegHi) {
				1685	assert(Dest->getType() == IceType_i64);
				1686	split64(Dest);
				1687	Variable *DestLo = Dest->getLo();
				1688	Variable *DestHi = Dest->getHi();
				1689	_mov(DestLo, ReturnReg);
				1690	_mov(DestHi, ReturnRegHi);
				1691	} else {
				1692	assert(Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|
				1693	Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|
				1694	isVectorType(Dest->getType()));
				1695	if (isFloatingType(Dest->getType()) \|\| isVectorType(Dest->getType())) {
				1696	UnimplementedError(Func->getContext()->getFlags());
				1697	} else {
				1698	_mov(Dest, ReturnReg);
				1699	}
				1700	}
				1701	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1702	}
				1703
				1704	void TargetARM32::lowerCast(const InstCast *Inst) {
				1705	InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1706	Variable *Dest = Inst->getDest();
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	1707	Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1708	switch (CastKind) {
				1709	default:
				1710	Func->setError("Cast type not supported");
				1711	return;
				1712	case InstCast::Sext: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1713	if (isVectorType(Dest->getType())) {
				1714	UnimplementedError(Func->getContext()->getFlags());
				1715	} else if (Dest->getType() == IceType_i64) {
				1716	// t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
				1717	Constant *ShiftAmt = Ctx->getConstantInt32(31);
				1718	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1719	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1720	Variable *T_Lo = makeReg(DestLo->getType());
				1721	if (Src0->getType() == IceType_i32) {
				1722	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1723	_mov(T_Lo, Src0RF);
				1724	} else if (Src0->getType() == IceType_i1) {
				1725	Variable *Src0R = legalizeToVar(Src0);
				1726	_lsl(T_Lo, Src0R, ShiftAmt);
				1727	_asr(T_Lo, T_Lo, ShiftAmt);
				1728	} else {
				1729	Variable *Src0R = legalizeToVar(Src0);
				1730	_sxt(T_Lo, Src0R);
				1731	}
				1732	_mov(DestLo, T_Lo);
				1733	Variable *T_Hi = makeReg(DestHi->getType());
				1734	if (Src0->getType() != IceType_i1) {
				1735	_mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
				1736	OperandARM32::ASR, ShiftAmt));
				1737	} else {
				1738	// For i1, the asr instruction is already done above.
				1739	_mov(T_Hi, T_Lo);
				1740	}
				1741	_mov(DestHi, T_Hi);
				1742	} else if (Src0->getType() == IceType_i1) {
				1743	// GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
				1744	// lsl t1, src_reg, 31
				1745	// asr t1, t1, 31
				1746	// dst = t1
				1747	Variable *Src0R = legalizeToVar(Src0);
				1748	Constant *ShiftAmt = Ctx->getConstantInt32(31);
				1749	Variable *T = makeReg(Dest->getType());
				1750	_lsl(T, Src0R, ShiftAmt);
				1751	_asr(T, T, ShiftAmt);
				1752	_mov(Dest, T);
				1753	} else {
				1754	// t1 = sxt src; dst = t1
				1755	Variable *Src0R = legalizeToVar(Src0);
				1756	Variable *T = makeReg(Dest->getType());
				1757	_sxt(T, Src0R);
				1758	_mov(Dest, T);
				1759	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1760	break;
				1761	}
				1762	case InstCast::Zext: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1763	if (isVectorType(Dest->getType())) {
				1764	UnimplementedError(Func->getContext()->getFlags());
				1765	} else if (Dest->getType() == IceType_i64) {
				1766	// t1=uxtb src; dst.lo=t1; dst.hi=0
				1767	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1768	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1769	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1770	Variable *T_Lo = makeReg(DestLo->getType());
				1771	// i32 and i1 can just take up the whole register.
				1772	// i32 doesn't need uxt, while i1 will have an and mask later anyway.
				1773	if (Src0->getType() == IceType_i32 \|\| Src0->getType() == IceType_i1) {
				1774	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1775	_mov(T_Lo, Src0RF);
				1776	} else {
				1777	Variable *Src0R = legalizeToVar(Src0);
				1778	_uxt(T_Lo, Src0R);
				1779	}
				1780	if (Src0->getType() == IceType_i1) {
				1781	Constant *One = Ctx->getConstantInt32(1);
				1782	_and(T_Lo, T_Lo, One);
				1783	}
				1784	_mov(DestLo, T_Lo);
				1785	Variable *T_Hi = makeReg(DestLo->getType());
				1786	_mov(T_Hi, Zero);
				1787	_mov(DestHi, T_Hi);
				1788	} else if (Src0->getType() == IceType_i1) {
				1789	// t = Src0; t &= 1; Dest = t
				1790	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1791	Constant *One = Ctx->getConstantInt32(1);
				1792	Variable *T = makeReg(Dest->getType());
				1793	// Just use _mov instead of _uxt since all registers are 32-bit.
				1794	// _uxt requires the source to be a register so could have required
				1795	// a _mov from legalize anyway.
				1796	_mov(T, Src0RF);
				1797	_and(T, T, One);
				1798	_mov(Dest, T);
				1799	} else {
				1800	// t1 = uxt src; dst = t1
				1801	Variable *Src0R = legalizeToVar(Src0);
				1802	Variable *T = makeReg(Dest->getType());
				1803	_uxt(T, Src0R);
				1804	_mov(Dest, T);
				1805	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1806	break;
				1807	}
				1808	case InstCast::Trunc: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1809	if (isVectorType(Dest->getType())) {
				1810	UnimplementedError(Func->getContext()->getFlags());
				1811	} else {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1812	if (Src0->getType() == IceType_i64)
				1813	Src0 = loOperand(Src0);
				1814	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1815	// t1 = trunc Src0RF; Dest = t1
				1816	Variable *T = makeReg(Dest->getType());
				1817	_mov(T, Src0RF);
				1818	if (Dest->getType() == IceType_i1)
				1819	_and(T, T, Ctx->getConstantInt1(1));
				1820	_mov(Dest, T);
				1821	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1822	break;
				1823	}
				1824	case InstCast::Fptrunc:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1825	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1826	break;
				1827	case InstCast::Fpext: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1828	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1829	break;
				1830	}
				1831	case InstCast::Fptosi:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1832	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1833	break;
				1834	case InstCast::Fptoui:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1835	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1836	break;
				1837	case InstCast::Sitofp:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1838	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1839	break;
				1840	case InstCast::Uitofp: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1841	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1842	break;
				1843	}
				1844	case InstCast::Bitcast: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1845	Operand *Src0 = Inst->getSrc(0);
				1846	if (Dest->getType() == Src0->getType()) {
				1847	InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
				1848	lowerAssign(Assign);
				1849	return;
				1850	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1851	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1852	break;
				1853	}
				1854	}
				1855	}
				1856
				1857	void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
				1858	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1859	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1860	}
				1861
				1862	void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
				1863	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1864	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1865	}
				1866
				1867	void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1868	Variable *Dest = Inst->getDest();
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	1869	Operand *Src0 = legalizeUndef(Inst->getSrc(0));
				1870	Operand *Src1 = legalizeUndef(Inst->getSrc(1));
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1871
				1872	if (isVectorType(Dest->getType())) {
				1873	UnimplementedError(Func->getContext()->getFlags());
				1874	return;
				1875	}
				1876
				1877	// a=icmp cond, b, c ==>
				1878	// GCC does:
				1879	// cmp b.hi, c.hi or cmp b.lo, c.lo
				1880	// cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
				1881	// mov.<C1> t, #1 mov.<C1> t, #1
				1882	// mov.<C2> t, #0 mov.<C2> t, #0
				1883	// mov a, t mov a, t
				1884	// where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
				1885	// is used for signed compares. In some cases, b and c need to be swapped
				1886	// as well.
				1887	//
				1888	// LLVM does:
				1889	// for EQ and NE:
				1890	// eor t1, b.hi, c.hi
				1891	// eor t2, b.lo, c.hi
				1892	// orrs t, t1, t2
				1893	// mov.<C> t, #1
				1894	// mov a, t
				1895	//
				1896	// that's nice in that it's just as short but has fewer dependencies
				1897	// for better ILP at the cost of more registers.
				1898	//
				1899	// Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
				1900	// two unconditional mov #0, two cmps, two conditional mov #1,
				1901	// and one conditonal reg mov. That has few dependencies for good ILP,
				1902	// but is a longer sequence.
				1903	//
				1904	// So, we are going with the GCC version since it's usually better (except
				1905	// perhaps for eq/ne). We could revisit special-casing eq/ne later.
				1906	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1907	Constant *One = Ctx->getConstantInt32(1);
				1908	if (Src0->getType() == IceType_i64) {
				1909	InstIcmp::ICond Conditon = Inst->getCondition();
				1910	size_t Index = static_cast<size_t>(Conditon);
				1911	assert(Index < TableIcmp64Size);
				1912	Variable Src0Lo, Src0Hi;
				1913	Operand Src1LoRF, Src1HiRF;
				1914	if (TableIcmp64[Index].Swapped) {
				1915	Src0Lo = legalizeToVar(loOperand(Src1));
				1916	Src0Hi = legalizeToVar(hiOperand(Src1));
				1917	Src1LoRF = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);
				1918	Src1HiRF = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);
				1919	} else {
				1920	Src0Lo = legalizeToVar(loOperand(Src0));
				1921	Src0Hi = legalizeToVar(hiOperand(Src0));
				1922	Src1LoRF = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);
				1923	Src1HiRF = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);
				1924	}
				1925	Variable *T = makeReg(IceType_i32);
				1926	if (TableIcmp64[Index].IsSigned) {
				1927	Variable *ScratchReg = makeReg(IceType_i32);
				1928	_cmp(Src0Lo, Src1LoRF);
				1929	_sbcs(ScratchReg, Src0Hi, Src1HiRF);
				1930	// ScratchReg isn't going to be used, but we need the
				1931	// side-effect of setting flags from this operation.
				1932	Context.insert(InstFakeUse::create(Func, ScratchReg));
				1933	} else {
				1934	_cmp(Src0Hi, Src1HiRF);
				1935	_cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
				1936	}
				1937	_mov(T, One, TableIcmp64[Index].C1);
				1938	_mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
				1939	_mov(Dest, T);
				1940	return;
				1941	}
				1942
				1943	// a=icmp cond b, c ==>
				1944	// GCC does:
				1945	// <u/s>xtb tb, b
				1946	// <u/s>xtb tc, c
				1947	// cmp tb, tc
				1948	// mov.C1 t, #0
				1949	// mov.C2 t, #1
				1950	// mov a, t
				1951	// where the unsigned/sign extension is not needed for 32-bit.
				1952	// They also have special cases for EQ and NE. E.g., for NE:
				1953	// <extend to tb, tc>
				1954	// subs t, tb, tc
				1955	// movne t, #1
				1956	// mov a, t
				1957	//
				1958	// LLVM does:
				1959	// lsl tb, b, #<N>
				1960	// mov t, #0
				1961	// cmp tb, c, lsl #<N>
				1962	// mov.<C> t, #1
				1963	// mov a, t
				1964	//
				1965	// the left shift is by 0, 16, or 24, which allows the comparison to focus
				1966	// on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
				1967	// For the unsigned case, for some reason it does similar to GCC and does
				1968	// a uxtb first. It's not clear to me why that special-casing is needed.
				1969	//
				1970	// We'll go with the LLVM way for now, since it's shorter and has just as
				1971	// few dependencies.
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1972	int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
				1973	assert(ShiftAmt >= 0);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1974	Constant *ShiftConst = nullptr;
				1975	Variable *Src0R = nullptr;
				1976	Variable *T = makeReg(IceType_i32);
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1977	if (ShiftAmt) {
				1978	ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1979	Src0R = makeReg(IceType_i32);
				1980	_lsl(Src0R, legalizeToVar(Src0), ShiftConst);
				1981	} else {
				1982	Src0R = legalizeToVar(Src0);
				1983	}
				1984	_mov(T, Zero);
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1985	if (ShiftAmt) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1986	Variable *Src1R = legalizeToVar(Src1);
				1987	OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
				1988	Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
				1989	_cmp(Src0R, Src1RShifted);
				1990	} else {
				1991	Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);
				1992	_cmp(Src0R, Src1RF);
				1993	}
				1994	_mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
				1995	_mov(Dest, T);
				1996	return;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1997	}
				1998
				1999	void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
				2000	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2001	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2002	}
				2003
				2004	void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
				2005	switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
				2006	case Intrinsics::AtomicCmpxchg: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2007	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2008	return;
				2009	}
				2010	case Intrinsics::AtomicFence:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2011	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2012	return;
				2013	case Intrinsics::AtomicFenceAll:
				2014	// NOTE: FenceAll should prevent and load/store from being moved
				2015	// across the fence (both atomic and non-atomic). The InstARM32Mfence
				2016	// instruction is currently marked coarsely as "HasSideEffects".
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2017	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2018	return;
				2019	case Intrinsics::AtomicIsLockFree: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2020	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2021	return;
				2022	}
				2023	case Intrinsics::AtomicLoad: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2024	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2025	return;
				2026	}
				2027	case Intrinsics::AtomicRMW:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2028	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2029	return;
				2030	case Intrinsics::AtomicStore: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2031	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2032	return;
				2033	}
				2034	case Intrinsics::Bswap: {
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2035	Variable *Dest = Instr->getDest();
				2036	Operand *Val = Instr->getArg(0);
				2037	Type Ty = Val->getType();
				2038	if (Ty == IceType_i64) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2039	Val = legalizeUndef(Val);
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2040	Variable *Val_Lo = legalizeToVar(loOperand(Val));
				2041	Variable *Val_Hi = legalizeToVar(hiOperand(Val));
				2042	Variable *T_Lo = makeReg(IceType_i32);
				2043	Variable *T_Hi = makeReg(IceType_i32);
				2044	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				2045	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				2046	_rev(T_Lo, Val_Lo);
				2047	_rev(T_Hi, Val_Hi);
				2048	_mov(DestLo, T_Hi);
				2049	_mov(DestHi, T_Lo);
				2050	} else {
				2051	assert(Ty == IceType_i32 \|\| Ty == IceType_i16);
				2052	Variable *ValR = legalizeToVar(Val);
				2053	Variable *T = makeReg(Ty);
				2054	_rev(T, ValR);
				2055	if (Val->getType() == IceType_i16) {
				2056	Operand *Sixteen =
				2057	legalize(Ctx->getConstantInt32(16), Legal_Reg \| Legal_Flex);
				2058	_lsr(T, T, Sixteen);
				2059	}
				2060	_mov(Dest, T);
				2061	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2062	return;
				2063	}
				2064	case Intrinsics::Ctpop: {
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2065	Variable *Dest = Instr->getDest();
				2066	Operand *Val = Instr->getArg(0);
				2067	InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
				2068	? H_call_ctpop_i32
				2069	: H_call_ctpop_i64,
				2070	Dest, 1);
				2071	Call->addArg(Val);
				2072	lowerCall(Call);
				2073	// The popcount helpers always return 32-bit values, while the intrinsic's
				2074	// signature matches some 64-bit platform's native instructions and
				2075	// expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
				2076	// just in case the user doesn't do that in the IR or doesn't toss the bits
				2077	// via truncate.
				2078	if (Val->getType() == IceType_i64) {
				2079	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				2080	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				2081	_mov(DestHi, Zero);
				2082	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2083	return;
				2084	}
				2085	case Intrinsics::Ctlz: {
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2086	// The "is zero undef" parameter is ignored and we always return
				2087	// a well-defined value.
				2088	Operand *Val = Instr->getArg(0);
				2089	Variable *ValLoR;
				2090	Variable *ValHiR = nullptr;
				2091	if (Val->getType() == IceType_i64) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2092	Val = legalizeUndef(Val);
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2093	ValLoR = legalizeToVar(loOperand(Val));
				2094	ValHiR = legalizeToVar(hiOperand(Val));
				2095	} else {
				2096	ValLoR = legalizeToVar(Val);
				2097	}
				2098	lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2099	return;
				2100	}
				2101	case Intrinsics::Cttz: {
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2102	// Essentially like Clz, but reverse the bits first.
				2103	Operand *Val = Instr->getArg(0);
				2104	Variable *ValLoR;
				2105	Variable *ValHiR = nullptr;
				2106	if (Val->getType() == IceType_i64) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2107	Val = legalizeUndef(Val);
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2108	ValLoR = legalizeToVar(loOperand(Val));
				2109	ValHiR = legalizeToVar(hiOperand(Val));
				2110	Variable *TLo = makeReg(IceType_i32);
				2111	Variable *THi = makeReg(IceType_i32);
				2112	_rbit(TLo, ValLoR);
				2113	_rbit(THi, ValHiR);
				2114	ValLoR = THi;
				2115	ValHiR = TLo;
				2116	} else {
				2117	ValLoR = legalizeToVar(Val);
				2118	Variable *T = makeReg(IceType_i32);
				2119	_rbit(T, ValLoR);
				2120	ValLoR = T;
				2121	}
				2122	lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2123	return;
				2124	}
				2125	case Intrinsics::Fabs: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2126	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2127	return;
				2128	}
				2129	case Intrinsics::Longjmp: {
				2130	InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
				2131	Call->addArg(Instr->getArg(0));
				2132	Call->addArg(Instr->getArg(1));
				2133	lowerCall(Call);
				2134	return;
				2135	}
				2136	case Intrinsics::Memcpy: {
				2137	// In the future, we could potentially emit an inline memcpy/memset, etc.
				2138	// for intrinsic calls w/ a known length.
				2139	InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
				2140	Call->addArg(Instr->getArg(0));
				2141	Call->addArg(Instr->getArg(1));
				2142	Call->addArg(Instr->getArg(2));
				2143	lowerCall(Call);
				2144	return;
				2145	}
				2146	case Intrinsics::Memmove: {
				2147	InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
				2148	Call->addArg(Instr->getArg(0));
				2149	Call->addArg(Instr->getArg(1));
				2150	Call->addArg(Instr->getArg(2));
				2151	lowerCall(Call);
				2152	return;
				2153	}
				2154	case Intrinsics::Memset: {
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2155	// The value operand needs to be extended to a stack slot size because the
				2156	// PNaCl ABI requires arguments to be at least 32 bits wide.
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2157	Operand *ValOp = Instr->getArg(1);
				2158	assert(ValOp->getType() == IceType_i8);
				2159	Variable *ValExt = Func->makeVariable(stackSlotType());
				2160	lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2161	// Technically, ARM has their own __aeabi_memset, but we can use plain
				2162	// memset too. The value and size argument need to be flipped if we ever
				2163	// decide to use __aeabi_memset.
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2164	InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
				2165	Call->addArg(Instr->getArg(0));
				2166	Call->addArg(ValExt);
				2167	Call->addArg(Instr->getArg(2));
				2168	lowerCall(Call);
				2169	return;
				2170	}
				2171	case Intrinsics::NaClReadTP: {
				2172	if (Ctx->getFlags().getUseSandboxing()) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2173	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2174	} else {
				2175	InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
				2176	lowerCall(Call);
				2177	}
				2178	return;
				2179	}
				2180	case Intrinsics::Setjmp: {
				2181	InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
				2182	Call->addArg(Instr->getArg(0));
				2183	lowerCall(Call);
				2184	return;
				2185	}
				2186	case Intrinsics::Sqrt: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2187	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2188	return;
				2189	}
				2190	case Intrinsics::Stacksave: {
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2191	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				2192	Variable *Dest = Instr->getDest();
				2193	_mov(Dest, SP);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2194	return;
				2195	}
				2196	case Intrinsics::Stackrestore: {
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2197	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				2198	Operand *Val = legalize(Instr->getArg(0), Legal_Reg \| Legal_Flex);
				2199	_mov_nonkillable(SP, Val);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2200	return;
				2201	}
				2202	case Intrinsics::Trap:
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2203	_trap();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2204	return;
				2205	case Intrinsics::UnknownIntrinsic:
				2206	Func->setError("Should not be lowering UnknownIntrinsic");
				2207	return;
				2208	}
				2209	return;
				2210	}
				2211
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2212	void TargetARM32::lowerCLZ(Variable Dest, Variable ValLoR, Variable *ValHiR) {
				2213	Type Ty = Dest->getType();
				2214	assert(Ty == IceType_i32 \|\| Ty == IceType_i64);
				2215	Variable *T = makeReg(IceType_i32);
				2216	_clz(T, ValLoR);
				2217	if (Ty == IceType_i64) {
				2218	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				2219	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				2220	Operand *Zero =
				2221	legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg \| Legal_Flex);
				2222	Operand *ThirtyTwo =
				2223	legalize(Ctx->getConstantInt32(32), Legal_Reg \| Legal_Flex);
				2224	_cmp(ValHiR, Zero);
				2225	Variable *T2 = makeReg(IceType_i32);
				2226	_add(T2, T, ThirtyTwo);
				2227	_clz(T2, ValHiR, CondARM32::NE);
				2228	// T2 is actually a source as well when the predicate is not AL
				2229	// (since it may leave T2 alone). We use set_dest_nonkillable to
				2230	// prolong the liveness of T2 as if it was used as a source.
				2231	_set_dest_nonkillable();
				2232	_mov(DestLo, T2);
				2233	_mov(DestHi, Ctx->getConstantZero(IceType_i32));
				2234	return;
				2235	}
				2236	_mov(Dest, T);
				2237	return;
				2238	}
				2239
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2240	void TargetARM32::lowerLoad(const InstLoad *Load) {
				2241	// A Load instruction can be treated the same as an Assign
				2242	// instruction, after the source operand is transformed into an
				2243	// OperandARM32Mem operand.
				2244	Type Ty = Load->getDest()->getType();
				2245	Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
				2246	Variable *DestLoad = Load->getDest();
				2247
				2248	// TODO(jvoung): handled folding opportunities. Sign and zero extension
				2249	// can be folded into a load.
				2250	InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
				2251	lowerAssign(Assign);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2252	}
				2253
				2254	void TargetARM32::doAddressOptLoad() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2255	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2256	}
				2257
				2258	void TargetARM32::randomlyInsertNop(float Probability) {
				2259	RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
				2260	if (RNG.getTrueWithProbability(Probability)) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2261	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2262	}
				2263	}
				2264
				2265	void TargetARM32::lowerPhi(const InstPhi * /Inst/) {
				2266	Func->setError("Phi found in regular instruction list");
				2267	}
				2268
				2269	void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2270	Variable *Reg = nullptr;
				2271	if (Inst->hasRetValue()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2272	Operand *Src0 = Inst->getRetValue();
				2273	if (Src0->getType() == IceType_i64) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2274	Src0 = legalizeUndef(Src0);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2275	Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0);
				2276	Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1);
				2277	Reg = R0;
				2278	Context.insert(InstFakeUse::create(Func, R1));
				2279	} else if (isScalarFloatingType(Src0->getType())) {
				2280	UnimplementedError(Func->getContext()->getFlags());
				2281	} else if (isVectorType(Src0->getType())) {
				2282	UnimplementedError(Func->getContext()->getFlags());
				2283	} else {
				2284	Operand *Src0F = legalize(Src0, Legal_Reg \| Legal_Flex);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	2285	_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2286	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2287	}
				2288	// Add a ret instruction even if sandboxing is enabled, because
				2289	// addEpilog explicitly looks for a ret instruction as a marker for
				2290	// where to insert the frame removal instructions.
				2291	// addEpilog is responsible for restoring the "lr" register as needed
				2292	// prior to this ret instruction.
				2293	_ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
				2294	// Add a fake use of sp to make sure sp stays alive for the entire
				2295	// function. Otherwise post-call sp adjustments get dead-code
				2296	// eliminated. TODO: Are there more places where the fake use
				2297	// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
				2298	// have a ret instruction.
Jan Voung	f645d85	2015-07-09 10:35:09 -0700	[diff] [blame]	2299	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2300	Context.insert(InstFakeUse::create(Func, SP));
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2301	}
				2302
				2303	void TargetARM32::lowerSelect(const InstSelect *Inst) {
Jan Voung	e0df91f	2015-06-30 08:47:06 -0700	[diff] [blame]	2304	Variable *Dest = Inst->getDest();
				2305	Type DestTy = Dest->getType();
				2306	Operand *SrcT = Inst->getTrueOperand();
				2307	Operand *SrcF = Inst->getFalseOperand();
				2308	Operand *Condition = Inst->getCondition();
				2309
				2310	if (isVectorType(DestTy)) {
				2311	UnimplementedError(Func->getContext()->getFlags());
				2312	return;
				2313	}
				2314	if (isFloatingType(DestTy)) {
				2315	UnimplementedError(Func->getContext()->getFlags());
				2316	return;
				2317	}
				2318	// TODO(jvoung): handle folding opportunities.
				2319	// cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
				2320	Variable *CmpOpnd0 = legalizeToVar(Condition);
				2321	Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
				2322	_cmp(CmpOpnd0, CmpOpnd1);
				2323	CondARM32::Cond Cond = CondARM32::NE;
				2324	if (DestTy == IceType_i64) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2325	SrcT = legalizeUndef(SrcT);
				2326	SrcF = legalizeUndef(SrcF);
Jan Voung	e0df91f	2015-06-30 08:47:06 -0700	[diff] [blame]	2327	// Set the low portion.
				2328	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				2329	Variable *TLo = nullptr;
				2330	Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg \| Legal_Flex);
				2331	_mov(TLo, SrcFLo);
				2332	Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg \| Legal_Flex);
				2333	_mov_nonkillable(TLo, SrcTLo, Cond);
				2334	_mov(DestLo, TLo);
				2335	// Set the high portion.
				2336	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				2337	Variable *THi = nullptr;
				2338	Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg \| Legal_Flex);
				2339	_mov(THi, SrcFHi);
				2340	Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Flex);
				2341	_mov_nonkillable(THi, SrcTHi, Cond);
				2342	_mov(DestHi, THi);
				2343	return;
				2344	}
				2345	Variable *T = nullptr;
				2346	SrcF = legalize(SrcF, Legal_Reg \| Legal_Flex);
				2347	_mov(T, SrcF);
				2348	SrcT = legalize(SrcT, Legal_Reg \| Legal_Flex);
				2349	_mov_nonkillable(T, SrcT, Cond);
				2350	_mov(Dest, T);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2351	}
				2352
				2353	void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2354	Operand *Value = Inst->getData();
				2355	Operand *Addr = Inst->getAddr();
				2356	OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
				2357	Type Ty = NewAddr->getType();
				2358
				2359	if (Ty == IceType_i64) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2360	Value = legalizeUndef(Value);
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2361	Variable *ValueHi = legalizeToVar(hiOperand(Value));
				2362	Variable *ValueLo = legalizeToVar(loOperand(Value));
				2363	_str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
				2364	_str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
				2365	} else if (isVectorType(Ty)) {
				2366	UnimplementedError(Func->getContext()->getFlags());
				2367	} else {
				2368	Variable *ValueR = legalizeToVar(Value);
				2369	_str(ValueR, NewAddr);
				2370	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2371	}
				2372
				2373	void TargetARM32::doAddressOptStore() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2374	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2375	}
				2376
				2377	void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
Andrew Scull	fdc54db	2015-06-29 11:21:18 -0700	[diff] [blame]	2378	// This implements the most naive possible lowering.
				2379	// cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
				2380	Operand *Src0 = Inst->getComparison();
				2381	SizeT NumCases = Inst->getNumCases();
				2382	if (Src0->getType() == IceType_i64) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2383	Src0 = legalizeUndef(Src0);
Andrew Scull	fdc54db	2015-06-29 11:21:18 -0700	[diff] [blame]	2384	Variable *Src0Lo = legalizeToVar(loOperand(Src0));
				2385	Variable *Src0Hi = legalizeToVar(hiOperand(Src0));
				2386	for (SizeT I = 0; I < NumCases; ++I) {
				2387	Operand *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
				2388	Operand *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
				2389	ValueLo = legalize(ValueLo, Legal_Reg \| Legal_Flex);
				2390	ValueHi = legalize(ValueHi, Legal_Reg \| Legal_Flex);
				2391	_cmp(Src0Lo, ValueLo);
				2392	_cmp(Src0Hi, ValueHi, CondARM32::EQ);
				2393	_br(Inst->getLabel(I), CondARM32::EQ);
				2394	}
				2395	_br(Inst->getLabelDefault());
				2396	return;
				2397	}
Jan Voung	e0df91f	2015-06-30 08:47:06 -0700	[diff] [blame]	2398
Andrew Scull	fdc54db	2015-06-29 11:21:18 -0700	[diff] [blame]	2399	// 32 bit integer
				2400	Variable *Src0Var = legalizeToVar(Src0);
				2401	for (SizeT I = 0; I < NumCases; ++I) {
				2402	Operand *Value = Ctx->getConstantInt32(Inst->getValue(I));
				2403	Value = legalize(Value, Legal_Reg \| Legal_Flex);
				2404	_cmp(Src0Var, Value);
				2405	_br(Inst->getLabel(I), CondARM32::EQ);
				2406	}
				2407	_br(Inst->getLabelDefault());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2408	}
				2409
				2410	void TargetARM32::lowerUnreachable(const InstUnreachable * /Inst/) {
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	2411	_trap();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2412	}
				2413
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2414	void TargetARM32::prelowerPhis() {
Jan Voung	5348369	2015-07-16 10:47:46 -0700	[diff] [blame]	2415	PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2416	}
				2417
				2418	// Lower the pre-ordered list of assignments into mov instructions.
				2419	// Also has to do some ad-hoc register allocation as necessary.
				2420	void TargetARM32::lowerPhiAssignments(CfgNode *Node,
				2421	const AssignList &Assignments) {
				2422	(void)Node;
				2423	(void)Assignments;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2424	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2425	}
				2426
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2427	Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
				2428	Variable *Reg = makeReg(Ty, RegNum);
				2429	UnimplementedError(Func->getContext()->getFlags());
				2430	return Reg;
				2431	}
				2432
				2433	// Helper for legalize() to emit the right code to lower an operand to a
				2434	// register of the appropriate type.
				2435	Variable TargetARM32::copyToReg(Operand Src, int32_t RegNum) {
				2436	Type Ty = Src->getType();
				2437	Variable *Reg = makeReg(Ty, RegNum);
				2438	if (isVectorType(Ty)) {
				2439	UnimplementedError(Func->getContext()->getFlags());
				2440	} else {
				2441	// Mov's Src operand can really only be the flexible second operand type
				2442	// or a register. Users should guarantee that.
				2443	_mov(Reg, Src);
				2444	}
				2445	return Reg;
				2446	}
				2447
				2448	Operand TargetARM32::legalize(Operand From, LegalMask Allowed,
				2449	int32_t RegNum) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2450	Type Ty = From->getType();
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2451	// Assert that a physical register is allowed. To date, all calls
				2452	// to legalize() allow a physical register. Legal_Flex converts
				2453	// registers to the right type OperandARM32FlexReg as needed.
				2454	assert(Allowed & Legal_Reg);
				2455	// Go through the various types of operands:
				2456	// OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
				2457	// Given the above assertion, if type of operand is not legal
				2458	// (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
				2459	// to a register.
				2460	if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
				2461	// Before doing anything with a Mem operand, we need to ensure
				2462	// that the Base and Index components are in physical registers.
				2463	Variable *Base = Mem->getBase();
				2464	Variable *Index = Mem->getIndex();
				2465	Variable *RegBase = nullptr;
				2466	Variable *RegIndex = nullptr;
				2467	if (Base) {
				2468	RegBase = legalizeToVar(Base);
				2469	}
				2470	if (Index) {
				2471	RegIndex = legalizeToVar(Index);
				2472	}
				2473	// Create a new operand if there was a change.
				2474	if (Base != RegBase \|\| Index != RegIndex) {
				2475	// There is only a reg +/- reg or reg + imm form.
				2476	// Figure out which to re-create.
				2477	if (Mem->isRegReg()) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2478	Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2479	Mem->getShiftOp(), Mem->getShiftAmt(),
				2480	Mem->getAddrMode());
				2481	} else {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2482	Mem = OperandARM32Mem::create(Func, Ty, RegBase, Mem->getOffset(),
				2483	Mem->getAddrMode());
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2484	}
				2485	}
				2486	if (!(Allowed & Legal_Mem)) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2487	Variable *Reg = makeReg(Ty, RegNum);
				2488	_ldr(Reg, Mem);
				2489	From = Reg;
				2490	} else {
				2491	From = Mem;
				2492	}
				2493	return From;
				2494	}
				2495
				2496	if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
				2497	if (!(Allowed & Legal_Flex)) {
				2498	if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
				2499	if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
				2500	From = FlexReg->getReg();
				2501	// Fall through and let From be checked as a Variable below,
				2502	// where it may or may not need a register.
				2503	} else {
				2504	return copyToReg(Flex, RegNum);
				2505	}
				2506	} else {
				2507	return copyToReg(Flex, RegNum);
				2508	}
				2509	} else {
				2510	return From;
				2511	}
				2512	}
				2513
				2514	if (llvm::isa<Constant>(From)) {
				2515	if (llvm::isa<ConstantUndef>(From)) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2516	From = legalizeUndef(From, RegNum);
				2517	if (isVectorType(Ty))
				2518	return From;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2519	}
				2520	// There should be no constants of vector type (other than undef).
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2521	assert(!isVectorType(Ty));
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2522	bool CanBeFlex = Allowed & Legal_Flex;
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2523	if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2524	uint32_t RotateAmt;
				2525	uint32_t Immed_8;
				2526	uint32_t Value = static_cast<uint32_t>(C32->getValue());
				2527	// Check if the immediate will fit in a Flexible second operand,
				2528	// if a Flexible second operand is allowed. We need to know the exact
				2529	// value, so that rules out relocatable constants.
				2530	// Also try the inverse and use MVN if possible.
				2531	if (CanBeFlex &&
				2532	OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2533	return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2534	} else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
				2535	~Value, &RotateAmt, &Immed_8)) {
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2536	auto InvertedFlex =
				2537	OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2538	Variable *Reg = makeReg(Ty, RegNum);
				2539	_mvn(Reg, InvertedFlex);
				2540	return Reg;
				2541	} else {
				2542	// Do a movw/movt to a register.
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2543	Variable *Reg = makeReg(Ty, RegNum);
				2544	uint32_t UpperBits = (Value >> 16) & 0xFFFF;
				2545	_movw(Reg,
				2546	UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
				2547	if (UpperBits != 0) {
				2548	_movt(Reg, Ctx->getConstantInt32(UpperBits));
				2549	}
				2550	return Reg;
				2551	}
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2552	} else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2553	Variable *Reg = makeReg(Ty, RegNum);
				2554	_movw(Reg, C);
				2555	_movt(Reg, C);
				2556	return Reg;
				2557	} else {
				2558	// Load floats/doubles from literal pool.
				2559	UnimplementedError(Func->getContext()->getFlags());
				2560	From = copyToReg(From, RegNum);
				2561	}
				2562	return From;
				2563	}
				2564
				2565	if (auto Var = llvm::dyn_cast<Variable>(From)) {
				2566	// Check if the variable is guaranteed a physical register. This
				2567	// can happen either when the variable is pre-colored or when it is
				2568	// assigned infinite weight.
				2569	bool MustHaveRegister = (Var->hasReg() \|\| Var->getWeight().isInf());
				2570	// We need a new physical register for the operand if:
				2571	// Mem is not allowed and Var isn't guaranteed a physical
				2572	// register, or
				2573	// RegNum is required and Var->getRegNum() doesn't match.
				2574	if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|
				2575	(RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
				2576	From = copyToReg(From, RegNum);
				2577	}
				2578	return From;
				2579	}
				2580	llvm_unreachable("Unhandled operand kind in legalize()");
				2581
				2582	return From;
				2583	}
				2584
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2585	/// Provide a trivial wrapper to legalize() for this common usage.
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2586	Variable TargetARM32::legalizeToVar(Operand From, int32_t RegNum) {
				2587	return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
				2588	}
				2589
Jan Voung	fbdd244	2015-07-15 12:36:20 -0700	[diff] [blame]	2590	/// Legalize undef values to concrete values.
				2591	Operand TargetARM32::legalizeUndef(Operand From, int32_t RegNum) {
				2592	Type Ty = From->getType();
				2593	if (llvm::isa<ConstantUndef>(From)) {
				2594	// Lower undefs to zero. Another option is to lower undefs to an
				2595	// uninitialized register; however, using an uninitialized register
				2596	// results in less predictable code.
				2597	//
				2598	// If in the future the implementation is changed to lower undef
				2599	// values to uninitialized registers, a FakeDef will be needed:
				2600	// Context.insert(InstFakeDef::create(Func, Reg));
				2601	// This is in order to ensure that the live range of Reg is not
				2602	// overestimated. If the constant being lowered is a 64 bit value,
				2603	// then the result should be split and the lo and hi components will
				2604	// need to go in uninitialized registers.
				2605	if (isVectorType(Ty))
				2606	return makeVectorOfZeros(Ty, RegNum);
				2607	return Ctx->getConstantZero(Ty);
				2608	}
				2609	return From;
				2610	}
				2611
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2612	OperandARM32Mem TargetARM32::formMemoryOperand(Operand Operand, Type Ty) {
				2613	OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
				2614	// It may be the case that address mode optimization already creates
				2615	// an OperandARM32Mem, so in that case it wouldn't need another level
				2616	// of transformation.
				2617	if (Mem) {
				2618	return llvm::cast<OperandARM32Mem>(legalize(Mem));
				2619	}
				2620	// If we didn't do address mode optimization, then we only
				2621	// have a base/offset to work with. ARM always requires a base
				2622	// register, so just use that to hold the operand.
				2623	Variable *Base = legalizeToVar(Operand);
				2624	return OperandARM32Mem::create(
				2625	Func, Ty, Base,
				2626	llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
				2627	}
				2628
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2629	Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
				2630	// There aren't any 64-bit integer registers for ARM32.
				2631	assert(Type != IceType_i64);
				2632	Variable *Reg = Func->makeVariable(Type);
				2633	if (RegNum == Variable::NoRegister)
				2634	Reg->setWeightInfinite();
				2635	else
				2636	Reg->setRegNum(RegNum);
				2637	return Reg;
				2638	}
				2639
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2640	void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
				2641	assert(llvm::isPowerOf2_32(Align));
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	2642	uint32_t RotateAmt;
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2643	uint32_t Immed_8;
				2644	Operand *Mask;
				2645	// Use AND or BIC to mask off the bits, depending on which immediate fits
				2646	// (if it fits at all). Assume Align is usually small, in which case BIC
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	2647	// works better. Thus, this rounds down to the alignment.
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2648	if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
				2649	Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg \| Legal_Flex);
				2650	_bic(Reg, Reg, Mask);
				2651	} else {
				2652	Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg \| Legal_Flex);
				2653	_and(Reg, Reg, Mask);
				2654	}
				2655	}
				2656
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2657	void TargetARM32::postLower() {
				2658	if (Ctx->getFlags().getOptLevel() == Opt_m1)
				2659	return;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2660	inferTwoAddress();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2661	}
				2662
				2663	void TargetARM32::makeRandomRegisterPermutation(
				2664	llvm::SmallVectorImpl<int32_t> &Permutation,
				2665	const llvm::SmallBitVector &ExcludeRegisters) const {
				2666	(void)Permutation;
				2667	(void)ExcludeRegisters;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2668	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2669	}
				2670
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2671	void TargetARM32::emit(const ConstantInteger32 *C) const {
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	2672	if (!BuildDefs::dump())
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2673	return;
				2674	Ostream &Str = Ctx->getStrEmit();
				2675	Str << getConstantPrefix() << C->getValue();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2676	}
				2677
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2678	void TargetARM32::emit(const ConstantInteger64 *) const {
				2679	llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2680	}
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2681
				2682	void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2683	(void)C;
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2684	UnimplementedError(Ctx->getFlags());
				2685	}
				2686
				2687	void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2688	(void)C;
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2689	UnimplementedError(Ctx->getFlags());
				2690	}
				2691
				2692	void TargetARM32::emit(const ConstantUndef *) const {
				2693	llvm::report_fatal_error("undef value encountered by emitter.");
				2694	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2695
				2696	TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
				2697	: TargetDataLowering(Ctx) {}
				2698
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2699	void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
				2700	const IceString &SectionSuffix) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2701	switch (Ctx->getFlags().getOutFileType()) {
				2702	case FT_Elf: {
				2703	ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2704	Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2705	} break;
				2706	case FT_Asm:
				2707	case FT_Iasm: {
				2708	const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
				2709	OstreamLocker L(Ctx);
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2710	for (const VariableDeclaration *Var : Vars) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2711	if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2712	emitGlobal(*Var, SectionSuffix);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2713	}
				2714	}
				2715	} break;
				2716	}
				2717	}
				2718
John Porto	0f86d03	2015-06-15 07:44:27 -0700	[diff] [blame]	2719	void TargetDataARM32::lowerConstants() {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2720	if (Ctx->getFlags().getDisableTranslation())
				2721	return;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2722	UnimplementedError(Ctx->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2723	}
				2724
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2725	TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	2726	: TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2727
				2728	void TargetHeaderARM32::lower() {
				2729	OstreamLocker L(Ctx);
				2730	Ostream &Str = Ctx->getStrEmit();
				2731	Str << ".syntax unified\n";
				2732	// Emit build attributes in format: .eabi_attribute TAG, VALUE.
				2733	// See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
				2734	// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
				2735	//
				2736	// Tag_conformance should be be emitted first in a file-scope
				2737	// sub-subsection of the first public subsection of the attributes.
				2738	Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
				2739	// Chromebooks are at least A15, but do A9 for higher compat.
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	2740	// For some reason, the LLVM ARM asm parser has the .cpu directive override
				2741	// the mattr specified on the commandline. So to test hwdiv, we need to set
				2742	// the .cpu directive higher (can't just rely on --mattr=...).
				2743	if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
				2744	Str << ".cpu cortex-a15\n";
				2745	} else {
				2746	Str << ".cpu cortex-a9\n";
				2747	}
				2748	Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2749	<< ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
				2750	Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
				2751	<< ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2752	Str << ".fpu neon\n"
				2753	<< ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
				2754	<< ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
				2755	<< ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
				2756	<< ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
				2757	<< ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
				2758	<< ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
				2759	<< ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
				2760	<< ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
				2761	<< ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
				2762	<< ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
				2763	<< ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
				2764	<< ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	2765	if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
				2766	Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
				2767	}
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2768	// Technically R9 is used for TLS with Sandboxing, and we reserve it.
				2769	// However, for compatibility with current NaCl LLVM, don't claim that.
				2770	Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
				2771	}
				2772
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2773	} // end of namespace Ice