Blame - src/IceTargetLoweringARM32.cpp - SwiftShader

blob: 6639da855c97822e11cd531bab1a4b637af61442 [file] [log] [blame]

Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1	//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
				2	//
				3	// The Subzero Code Generator
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
Andrew Scull	9612d32	2015-07-06 14:53:25 -0700	[diff] [blame]	9	///
				10	/// \file
				11	/// This file implements the TargetLoweringARM32 class, which consists almost
				12	/// entirely of the lowering sequence for each high-level instruction.
				13	///
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	14	//===----------------------------------------------------------------------===//
				15
John Porto	67f8de9	2015-06-25 10:14:17 -0700	[diff] [blame]	16	#include "IceTargetLoweringARM32.h"
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	17
				18	#include "IceCfg.h"
				19	#include "IceCfgNode.h"
				20	#include "IceClFlags.h"
				21	#include "IceDefs.h"
				22	#include "IceELFObjectWriter.h"
				23	#include "IceGlobalInits.h"
				24	#include "IceInstARM32.h"
				25	#include "IceLiveness.h"
				26	#include "IceOperand.h"
				27	#include "IceRegistersARM32.h"
				28	#include "IceTargetLoweringARM32.def"
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	29	#include "IceUtils.h"
John Porto	67f8de9	2015-06-25 10:14:17 -0700	[diff] [blame]	30	#include "llvm/Support/MathExtras.h"
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	31
				32	namespace Ice {
				33
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	34	namespace {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	35
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	36	void UnimplementedError(const ClFlags &Flags) {
				37	if (!Flags.getSkipUnimplemented()) {
				38	// Use llvm_unreachable instead of report_fatal_error, which gives better
				39	// stack traces.
				40	llvm_unreachable("Not yet implemented");
				41	abort();
				42	}
				43	}
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	44
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	45	// The following table summarizes the logic for lowering the icmp instruction
				46	// for i32 and narrower types. Each icmp condition has a clear mapping to an
				47	// ARM32 conditional move instruction.
				48
				49	const struct TableIcmp32_ {
				50	CondARM32::Cond Mapping;
				51	} TableIcmp32[] = {
				52	#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
				53	{ CondARM32::C_32 } \
				54	,
				55	ICMPARM32_TABLE
				56	#undef X
				57	};
				58	const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
				59
				60	// The following table summarizes the logic for lowering the icmp instruction
				61	// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
				62	// The operands may need to be swapped, and there is a slight difference
				63	// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
				64	const struct TableIcmp64_ {
				65	bool IsSigned;
				66	bool Swapped;
				67	CondARM32::Cond C1, C2;
				68	} TableIcmp64[] = {
				69	#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
				70	{ is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
				71	,
				72	ICMPARM32_TABLE
				73	#undef X
				74	};
				75	const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
				76
				77	CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
				78	size_t Index = static_cast<size_t>(Cond);
				79	assert(Index < TableIcmp32Size);
				80	return TableIcmp32[Index].Mapping;
				81	}
				82
				83	// In some cases, there are x-macros tables for both high-level and
				84	// low-level instructions/operands that use the same enum key value.
				85	// The tables are kept separate to maintain a proper separation
				86	// between abstraction layers. There is a risk that the tables could
				87	// get out of sync if enum values are reordered or if entries are
				88	// added or deleted. The following dummy namespaces use
				89	// static_asserts to ensure everything is kept in sync.
				90
				91	// Validate the enum values in ICMPARM32_TABLE.
				92	namespace dummy1 {
				93	// Define a temporary set of enum values based on low-level table
				94	// entries.
				95	enum _tmp_enum {
				96	#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
				97	ICMPARM32_TABLE
				98	#undef X
				99	_num
				100	};
				101	// Define a set of constants based on high-level table entries.
				102	#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
				103	ICEINSTICMP_TABLE
				104	#undef X
				105	// Define a set of constants based on low-level table entries, and
				106	// ensure the table entry keys are consistent.
				107	#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
				108	static const int _table2_##val = _tmp_##val; \
				109	static_assert( \
				110	_table1_##val == _table2_##val, \
				111	"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
				112	ICMPARM32_TABLE
				113	#undef X
				114	// Repeat the static asserts with respect to the high-level table
				115	// entries in case the high-level table has extra entries.
				116	#define X(tag, str) \
				117	static_assert( \
				118	_table1_##tag == _table2_##tag, \
				119	"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
				120	ICEINSTICMP_TABLE
				121	#undef X
				122	} // end of namespace dummy1
				123
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	124	// Stack alignment
				125	const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
				126
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	127	// Value is in bytes. Return Value adjusted to the next highest multiple
				128	// of the stack alignment.
				129	uint32_t applyStackAlignment(uint32_t Value) {
				130	return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
				131	}
				132
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	133	// Value is in bytes. Return Value adjusted to the next highest multiple
				134	// of the stack alignment required for the given type.
				135	uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
				136	// Use natural alignment, except that normally (non-NaCl) ARM only
				137	// aligns vectors to 8 bytes.
				138	// TODO(jvoung): Check this ...
				139	size_t typeAlignInBytes = typeWidthInBytes(Ty);
				140	if (isVectorType(Ty))
				141	typeAlignInBytes = 8;
				142	return Utils::applyAlignment(Value, typeAlignInBytes);
				143	}
				144
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	145	// Conservatively check if at compile time we know that the operand is
				146	// definitely a non-zero integer.
				147	bool isGuaranteedNonzeroInt(const Operand *Op) {
				148	if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
				149	return Const->getValue() != 0;
				150	}
				151	return false;
				152	}
				153
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	154	} // end of anonymous namespace
				155
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	156	TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
Jan Voung	d062f73	2015-06-15 17:17:31 -0700	[diff] [blame]	157	static_assert(
				158	(ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
				159	(TargetInstructionSet::ARM32InstructionSet_End -
				160	TargetInstructionSet::ARM32InstructionSet_Begin),
				161	"ARM32InstructionSet range different from TargetInstructionSet");
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	162	if (Flags.getTargetInstructionSet() !=
Jan Voung	d062f73	2015-06-15 17:17:31 -0700	[diff] [blame]	163	TargetInstructionSet::BaseInstructionSet) {
				164	InstructionSet = static_cast<ARM32InstructionSet>(
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	165	(Flags.getTargetInstructionSet() -
Jan Voung	d062f73	2015-06-15 17:17:31 -0700	[diff] [blame]	166	TargetInstructionSet::ARM32InstructionSet_Begin) +
				167	ARM32InstructionSet::Begin);
				168	}
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	169	}
				170
				171	TargetARM32::TargetARM32(Cfg *Func)
				172	: TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	173	// TODO: Don't initialize IntegerRegisters and friends every time.
				174	// Instead, initialize in some sort of static initializer for the
				175	// class.
				176	llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
				177	llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);
				178	llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
				179	llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
				180	ScratchRegs.resize(RegARM32::Reg_NUM);
				181	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				182	isFP) \
				183	IntegerRegisters[RegARM32::val] = isInt; \
				184	FloatRegisters[RegARM32::val] = isFP; \
				185	VectorRegisters[RegARM32::val] = isFP; \
				186	ScratchRegs[RegARM32::val] = scratch;
				187	REGARM32_TABLE;
				188	#undef X
				189	TypeToRegisterSet[IceType_void] = InvalidRegisters;
				190	TypeToRegisterSet[IceType_i1] = IntegerRegisters;
				191	TypeToRegisterSet[IceType_i8] = IntegerRegisters;
				192	TypeToRegisterSet[IceType_i16] = IntegerRegisters;
				193	TypeToRegisterSet[IceType_i32] = IntegerRegisters;
				194	TypeToRegisterSet[IceType_i64] = IntegerRegisters;
				195	TypeToRegisterSet[IceType_f32] = FloatRegisters;
				196	TypeToRegisterSet[IceType_f64] = FloatRegisters;
				197	TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
				198	TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
				199	TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
				200	TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
				201	TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
				202	TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
				203	TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
				204	}
				205
				206	void TargetARM32::translateO2() {
				207	TimerMarker T(TimerStack::TT_O2, Func);
				208
				209	// TODO(stichnot): share passes with X86?
				210	// https://code.google.com/p/nativeclient/issues/detail?id=4094
				211
				212	if (!Ctx->getFlags().getPhiEdgeSplit()) {
				213	// Lower Phi instructions.
				214	Func->placePhiLoads();
				215	if (Func->hasError())
				216	return;
				217	Func->placePhiStores();
				218	if (Func->hasError())
				219	return;
				220	Func->deletePhis();
				221	if (Func->hasError())
				222	return;
				223	Func->dump("After Phi lowering");
				224	}
				225
				226	// Address mode optimization.
				227	Func->getVMetadata()->init(VMK_SingleDefs);
				228	Func->doAddressOpt();
				229
				230	// Argument lowering
				231	Func->doArgLowering();
				232
				233	// Target lowering. This requires liveness analysis for some parts
				234	// of the lowering decisions, such as compare/branch fusing. If
				235	// non-lightweight liveness analysis is used, the instructions need
				236	// to be renumbered first. TODO: This renumbering should only be
				237	// necessary if we're actually calculating live intervals, which we
				238	// only do for register allocation.
				239	Func->renumberInstructions();
				240	if (Func->hasError())
				241	return;
				242
				243	// TODO: It should be sufficient to use the fastest liveness
				244	// calculation, i.e. livenessLightweight(). However, for some
				245	// reason that slows down the rest of the translation. Investigate.
				246	Func->liveness(Liveness_Basic);
				247	if (Func->hasError())
				248	return;
				249	Func->dump("After ARM32 address mode opt");
				250
				251	Func->genCode();
				252	if (Func->hasError())
				253	return;
				254	Func->dump("After ARM32 codegen");
				255
				256	// Register allocation. This requires instruction renumbering and
				257	// full liveness analysis.
				258	Func->renumberInstructions();
				259	if (Func->hasError())
				260	return;
				261	Func->liveness(Liveness_Intervals);
				262	if (Func->hasError())
				263	return;
				264	// Validate the live range computations. The expensive validation
				265	// call is deliberately only made when assertions are enabled.
				266	assert(Func->validateLiveness());
				267	// The post-codegen dump is done here, after liveness analysis and
				268	// associated cleanup, to make the dump cleaner and more useful.
				269	Func->dump("After initial ARM32 codegen");
				270	Func->getVMetadata()->init(VMK_All);
				271	regAlloc(RAK_Global);
				272	if (Func->hasError())
				273	return;
				274	Func->dump("After linear scan regalloc");
				275
				276	if (Ctx->getFlags().getPhiEdgeSplit()) {
				277	Func->advancedPhiLowering();
				278	Func->dump("After advanced Phi lowering");
				279	}
				280
				281	// Stack frame mapping.
				282	Func->genFrame();
				283	if (Func->hasError())
				284	return;
				285	Func->dump("After stack frame mapping");
				286
				287	Func->contractEmptyNodes();
				288	Func->reorderNodes();
				289
				290	// Branch optimization. This needs to be done just before code
				291	// emission. In particular, no transformations that insert or
				292	// reorder CfgNodes should be done after branch optimization. We go
				293	// ahead and do it before nop insertion to reduce the amount of work
				294	// needed for searching for opportunities.
				295	Func->doBranchOpt();
				296	Func->dump("After branch optimization");
				297
				298	// Nop insertion
				299	if (Ctx->getFlags().shouldDoNopInsertion()) {
				300	Func->doNopInsertion();
				301	}
				302	}
				303
				304	void TargetARM32::translateOm1() {
				305	TimerMarker T(TimerStack::TT_Om1, Func);
				306
				307	// TODO: share passes with X86?
				308
				309	Func->placePhiLoads();
				310	if (Func->hasError())
				311	return;
				312	Func->placePhiStores();
				313	if (Func->hasError())
				314	return;
				315	Func->deletePhis();
				316	if (Func->hasError())
				317	return;
				318	Func->dump("After Phi lowering");
				319
				320	Func->doArgLowering();
				321
				322	Func->genCode();
				323	if (Func->hasError())
				324	return;
				325	Func->dump("After initial ARM32 codegen");
				326
				327	regAlloc(RAK_InfOnly);
				328	if (Func->hasError())
				329	return;
				330	Func->dump("After regalloc of infinite-weight variables");
				331
				332	Func->genFrame();
				333	if (Func->hasError())
				334	return;
				335	Func->dump("After stack frame mapping");
				336
				337	// Nop insertion
				338	if (Ctx->getFlags().shouldDoNopInsertion()) {
				339	Func->doNopInsertion();
				340	}
				341	}
				342
				343	bool TargetARM32::doBranchOpt(Inst I, const CfgNode NextNode) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	344	if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
				345	return Br->optimizeBranch(NextNode);
				346	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	347	return false;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	348	}
				349
				350	IceString TargetARM32::RegNames[] = {
				351	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				352	isFP) \
				353	name,
				354	REGARM32_TABLE
				355	#undef X
				356	};
				357
				358	IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
				359	assert(RegNum < RegARM32::Reg_NUM);
				360	(void)Ty;
				361	return RegNames[RegNum];
				362	}
				363
				364	Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
				365	if (Ty == IceType_void)
				366	Ty = IceType_i32;
				367	if (PhysicalRegisters[Ty].empty())
				368	PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
				369	assert(RegNum < PhysicalRegisters[Ty].size());
				370	Variable *Reg = PhysicalRegisters[Ty][RegNum];
				371	if (Reg == nullptr) {
				372	Reg = Func->makeVariable(Ty);
				373	Reg->setRegNum(RegNum);
				374	PhysicalRegisters[Ty][RegNum] = Reg;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	375	// Specially mark SP and LR as an "argument" so that it is considered
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	376	// live upon function entry.
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	377	if (RegNum == RegARM32::Reg_sp \|\| RegNum == RegARM32::Reg_lr) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	378	Func->addImplicitArg(Reg);
				379	Reg->setIgnoreLiveness();
				380	}
				381	}
				382	return Reg;
				383	}
				384
				385	void TargetARM32::emitVariable(const Variable *Var) const {
				386	Ostream &Str = Ctx->getStrEmit();
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	387	if (Var->hasReg()) {
				388	Str << getRegName(Var->getRegNum(), Var->getType());
				389	return;
				390	}
				391	if (Var->getWeight().isInf()) {
				392	llvm::report_fatal_error(
				393	"Infinite-weight Variable has no register assigned");
				394	}
				395	int32_t Offset = Var->getStackOffset();
				396	if (!hasFramePointer())
				397	Offset += getStackAdjustment();
				398	// TODO(jvoung): Handle out of range. Perhaps we need a scratch register
				399	// to materialize a larger offset.
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	400	constexpr bool SignExt = false;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	401	if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) {
				402	llvm::report_fatal_error("Illegal stack offset");
				403	}
				404	const Type FrameSPTy = IceType_i32;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	405	Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy);
				406	if (Offset != 0) {
				407	Str << ", " << getConstantPrefix() << Offset;
				408	}
				409	Str << "]";
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	410	}
				411
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	412	bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
				413	if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
				414	return false;
				415	int32_t RegLo, RegHi;
				416	// Always start i64 registers at an even register, so this may end
				417	// up padding away a register.
				418	if (NumGPRRegsUsed % 2 != 0) {
				419	++NumGPRRegsUsed;
				420	}
				421	RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
				422	++NumGPRRegsUsed;
				423	RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
				424	++NumGPRRegsUsed;
				425	// If this bumps us past the boundary, don't allocate to a register
				426	// and leave any previously speculatively consumed registers as consumed.
				427	if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
				428	return false;
				429	Regs->first = RegLo;
				430	Regs->second = RegHi;
				431	return true;
				432	}
				433
				434	bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
				435	if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
				436	return false;
				437	*Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
				438	++NumGPRRegsUsed;
				439	return true;
				440	}
				441
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	442	void TargetARM32::lowerArguments() {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	443	VarList &Args = Func->getArgs();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	444	TargetARM32::CallingConv CC;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	445
				446	// For each register argument, replace Arg in the argument list with the
				447	// home register. Then generate an instruction in the prolog to copy the
				448	// home register to the assigned location of Arg.
				449	Context.init(Func->getEntryNode());
				450	Context.setInsertPoint(Context.getCur());
				451
				452	for (SizeT I = 0, E = Args.size(); I < E; ++I) {
				453	Variable *Arg = Args[I];
				454	Type Ty = Arg->getType();
				455	// TODO(jvoung): handle float/vector types.
				456	if (isVectorType(Ty)) {
				457	UnimplementedError(Func->getContext()->getFlags());
				458	continue;
				459	} else if (isFloatingType(Ty)) {
				460	UnimplementedError(Func->getContext()->getFlags());
				461	continue;
				462	} else if (Ty == IceType_i64) {
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	463	std::pair<int32_t, int32_t> RegPair;
				464	if (!CC.I64InRegs(&RegPair))
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	465	continue;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	466	Variable *RegisterArg = Func->makeVariable(Ty);
				467	Variable *RegisterLo = Func->makeVariable(IceType_i32);
				468	Variable *RegisterHi = Func->makeVariable(IceType_i32);
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	469	if (BuildDefs::dump()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	470	RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
				471	RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
				472	RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
				473	}
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	474	RegisterLo->setRegNum(RegPair.first);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	475	RegisterLo->setIsArg();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	476	RegisterHi->setRegNum(RegPair.second);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	477	RegisterHi->setIsArg();
				478	RegisterArg->setLoHi(RegisterLo, RegisterHi);
				479	RegisterArg->setIsArg();
				480	Arg->setIsArg(false);
				481
				482	Args[I] = RegisterArg;
				483	Context.insert(InstAssign::create(Func, Arg, RegisterArg));
				484	continue;
				485	} else {
				486	assert(Ty == IceType_i32);
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	487	int32_t RegNum;
				488	if (!CC.I32InReg(&RegNum))
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	489	continue;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	490	Variable *RegisterArg = Func->makeVariable(Ty);
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	491	if (BuildDefs::dump()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	492	RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
				493	}
				494	RegisterArg->setRegNum(RegNum);
				495	RegisterArg->setIsArg();
				496	Arg->setIsArg(false);
				497
				498	Args[I] = RegisterArg;
				499	Context.insert(InstAssign::create(Func, Arg, RegisterArg));
				500	}
				501	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	502	}
				503
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	504	// Helper function for addProlog().
				505	//
				506	// This assumes Arg is an argument passed on the stack. This sets the
				507	// frame offset for Arg and updates InArgsSizeBytes according to Arg's
				508	// width. For an I64 arg that has been split into Lo and Hi components,
				509	// it calls itself recursively on the components, taking care to handle
				510	// Lo first because of the little-endian architecture. Lastly, this
				511	// function generates an instruction to copy Arg into its assigned
				512	// register if applicable.
				513	void TargetARM32::finishArgumentLowering(Variable Arg, Variable FramePtr,
				514	size_t BasicFrameOffset,
				515	size_t &InArgsSizeBytes) {
				516	Variable *Lo = Arg->getLo();
				517	Variable *Hi = Arg->getHi();
				518	Type Ty = Arg->getType();
				519	if (Lo && Hi && Ty == IceType_i64) {
				520	assert(Lo->getType() != IceType_i64); // don't want infinite recursion
				521	assert(Hi->getType() != IceType_i64); // don't want infinite recursion
				522	finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				523	finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				524	return;
				525	}
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	526	InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	527	Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
				528	InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
				529	// If the argument variable has been assigned a register, we need to load
				530	// the value from the stack slot.
				531	if (Arg->hasReg()) {
				532	assert(Ty != IceType_i64);
				533	OperandARM32Mem *Mem = OperandARM32Mem::create(
				534	Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
				535	Ctx->getConstantInt32(Arg->getStackOffset())));
				536	if (isVectorType(Arg->getType())) {
				537	UnimplementedError(Func->getContext()->getFlags());
				538	} else {
				539	_ldr(Arg, Mem);
				540	}
				541	// This argument-copying instruction uses an explicit
				542	// OperandARM32Mem operand instead of a Variable, so its
				543	// fill-from-stack operation has to be tracked separately for
				544	// statistics.
				545	Ctx->statsUpdateFills();
				546	}
				547	}
				548
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	549	Type TargetARM32::stackSlotType() { return IceType_i32; }
				550
				551	void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	552	// Stack frame layout:
				553	//
				554	// +------------------------+
				555	// \| 1. preserved registers \|
				556	// +------------------------+
				557	// \| 2. padding \|
				558	// +------------------------+
				559	// \| 3. global spill area \|
				560	// +------------------------+
				561	// \| 4. padding \|
				562	// +------------------------+
				563	// \| 5. local spill area \|
				564	// +------------------------+
				565	// \| 6. padding \|
				566	// +------------------------+
				567	// \| 7. allocas \|
				568	// +------------------------+
				569	//
				570	// The following variables record the size in bytes of the given areas:
				571	// * PreservedRegsSizeBytes: area 1
				572	// * SpillAreaPaddingBytes: area 2
				573	// * GlobalsSize: area 3
				574	// * GlobalsAndSubsequentPaddingSize: areas 3 - 4
				575	// * LocalsSpillAreaSize: area 5
				576	// * SpillAreaSizeBytes: areas 2 - 6
				577	// Determine stack frame offsets for each Variable without a
				578	// register assignment. This can be done as one variable per stack
				579	// slot. Or, do coalescing by running the register allocator again
				580	// with an infinite set of registers (as a side effect, this gives
				581	// variables a second chance at physical register assignment).
				582	//
				583	// A middle ground approach is to leverage sparsity and allocate one
				584	// block of space on the frame for globals (variables with
				585	// multi-block lifetime), and one block to share for locals
				586	// (single-block lifetime).
				587
				588	Context.init(Node);
				589	Context.setInsertPoint(Context.getCur());
				590
				591	llvm::SmallBitVector CalleeSaves =
				592	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				593	RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
				594	VarList SortedSpilledVariables;
				595	size_t GlobalsSize = 0;
				596	// If there is a separate locals area, this represents that area.
				597	// Otherwise it counts any variable not counted by GlobalsSize.
				598	SpillAreaSizeBytes = 0;
				599	// If there is a separate locals area, this specifies the alignment
				600	// for it.
				601	uint32_t LocalsSlotsAlignmentBytes = 0;
				602	// The entire spill locations area gets aligned to largest natural
				603	// alignment of the variables that have a spill slot.
				604	uint32_t SpillAreaAlignmentBytes = 0;
				605	// For now, we don't have target-specific variables that need special
				606	// treatment (no stack-slot-linked SpillVariable type).
				607	std::function<bool(Variable *)> TargetVarHook =
				608	[](Variable *) { return false; };
				609
				610	// Compute the list of spilled variables and bounds for GlobalsSize, etc.
				611	getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
				612	&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
				613	&LocalsSlotsAlignmentBytes, TargetVarHook);
				614	uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
				615	SpillAreaSizeBytes += GlobalsSize;
				616
				617	// Add push instructions for preserved registers.
				618	// On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
				619	// Unlike x86, ARM also has callee-saved float/vector registers.
				620	// The "vpush" instruction can handle a whole list of float/vector
				621	// registers, but it only handles contiguous sequences of registers
				622	// by specifying the start and the length.
				623	VarList GPRsToPreserve;
				624	GPRsToPreserve.reserve(CalleeSaves.size());
				625	uint32_t NumCallee = 0;
				626	size_t PreservedRegsSizeBytes = 0;
				627	// Consider FP and LR as callee-save / used as needed.
				628	if (UsesFramePointer) {
				629	CalleeSaves[RegARM32::Reg_fp] = true;
				630	assert(RegsUsed[RegARM32::Reg_fp] == false);
				631	RegsUsed[RegARM32::Reg_fp] = true;
				632	}
				633	if (!MaybeLeafFunc) {
				634	CalleeSaves[RegARM32::Reg_lr] = true;
				635	RegsUsed[RegARM32::Reg_lr] = true;
				636	}
				637	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				638	if (CalleeSaves[i] && RegsUsed[i]) {
				639	// TODO(jvoung): do separate vpush for each floating point
				640	// register segment and += 4, or 8 depending on type.
				641	++NumCallee;
				642	PreservedRegsSizeBytes += 4;
				643	GPRsToPreserve.push_back(getPhysicalRegister(i));
				644	}
				645	}
				646	Ctx->statsUpdateRegistersSaved(NumCallee);
				647	if (!GPRsToPreserve.empty())
				648	_push(GPRsToPreserve);
				649
				650	// Generate "mov FP, SP" if needed.
				651	if (UsesFramePointer) {
				652	Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
				653	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				654	_mov(FP, SP);
				655	// Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
				656	Context.insert(InstFakeUse::create(Func, FP));
				657	}
				658
				659	// Align the variables area. SpillAreaPaddingBytes is the size of
				660	// the region after the preserved registers and before the spill areas.
				661	// LocalsSlotsPaddingBytes is the amount of padding between the globals
				662	// and locals area if they are separate.
				663	assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
				664	assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
				665	uint32_t SpillAreaPaddingBytes = 0;
				666	uint32_t LocalsSlotsPaddingBytes = 0;
				667	alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
				668	GlobalsSize, LocalsSlotsAlignmentBytes,
				669	&SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
				670	SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
				671	uint32_t GlobalsAndSubsequentPaddingSize =
				672	GlobalsSize + LocalsSlotsPaddingBytes;
				673
				674	// Align SP if necessary.
				675	if (NeedsStackAlignment) {
				676	uint32_t StackOffset = PreservedRegsSizeBytes;
				677	uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
				678	SpillAreaSizeBytes = StackSize - StackOffset;
				679	}
				680
				681	// Generate "sub sp, SpillAreaSizeBytes"
				682	if (SpillAreaSizeBytes) {
				683	// Use the IP inter-procedural scratch register if needed to legalize
				684	// the immediate.
				685	Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
				686	Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);
				687	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				688	_sub(SP, SP, SubAmount);
				689	}
				690	Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
				691
				692	resetStackAdjustment();
				693
				694	// Fill in stack offsets for stack args, and copy args into registers
				695	// for those that were register-allocated. Args are pushed right to
				696	// left, so Arg[0] is closest to the stack/frame pointer.
				697	Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
				698	size_t BasicFrameOffset = PreservedRegsSizeBytes;
				699	if (!UsesFramePointer)
				700	BasicFrameOffset += SpillAreaSizeBytes;
				701
				702	const VarList &Args = Func->getArgs();
				703	size_t InArgsSizeBytes = 0;
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	704	TargetARM32::CallingConv CC;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	705	for (Variable *Arg : Args) {
				706	Type Ty = Arg->getType();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	707	bool InRegs = false;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	708	// Skip arguments passed in registers.
				709	if (isVectorType(Ty)) {
				710	UnimplementedError(Func->getContext()->getFlags());
				711	continue;
				712	} else if (isFloatingType(Ty)) {
				713	UnimplementedError(Func->getContext()->getFlags());
				714	continue;
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	715	} else if (Ty == IceType_i64) {
				716	std::pair<int32_t, int32_t> DummyRegs;
				717	InRegs = CC.I64InRegs(&DummyRegs);
				718	} else {
				719	assert(Ty == IceType_i32);
				720	int32_t DummyReg;
				721	InRegs = CC.I32InReg(&DummyReg);
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	722	}
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	723	if (!InRegs)
				724	finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	725	}
				726
				727	// Fill in stack offsets for locals.
				728	assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
				729	SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
				730	UsesFramePointer);
				731	this->HasComputedFrame = true;
				732
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	733	if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	734	OstreamLocker L(Func->getContext());
				735	Ostream &Str = Func->getContext()->getStrDump();
				736
				737	Str << "Stack layout:\n";
				738	uint32_t SPAdjustmentPaddingSize =
				739	SpillAreaSizeBytes - LocalsSpillAreaSize -
				740	GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
				741	Str << " in-args = " << InArgsSizeBytes << " bytes\n"
				742	<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
				743	<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
				744	<< " globals spill area = " << GlobalsSize << " bytes\n"
				745	<< " globals-locals spill areas intermediate padding = "
				746	<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
				747	<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
				748	<< " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
				749
				750	Str << "Stack details:\n"
				751	<< " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
				752	<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
				753	<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
				754	<< " bytes\n"
				755	<< " is FP based = " << UsesFramePointer << "\n";
				756	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	757	}
				758
				759	void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	760	InstList &Insts = Node->getInsts();
				761	InstList::reverse_iterator RI, E;
				762	for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
				763	if (llvm::isa<InstARM32Ret>(*RI))
				764	break;
				765	}
				766	if (RI == E)
				767	return;
				768
				769	// Convert the reverse_iterator position into its corresponding
				770	// (forward) iterator position.
				771	InstList::iterator InsertPoint = RI.base();
				772	--InsertPoint;
				773	Context.init(Node);
				774	Context.setInsertPoint(InsertPoint);
				775
				776	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				777	if (UsesFramePointer) {
				778	Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
				779	// For late-stage liveness analysis (e.g. asm-verbose mode),
				780	// adding a fake use of SP before the assignment of SP=FP keeps
				781	// previous SP adjustments from being dead-code eliminated.
				782	Context.insert(InstFakeUse::create(Func, SP));
				783	_mov(SP, FP);
				784	} else {
				785	// add SP, SpillAreaSizeBytes
				786	if (SpillAreaSizeBytes) {
				787	// Use the IP inter-procedural scratch register if needed to legalize
				788	// the immediate. It shouldn't be live at this point.
				789	Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
				790	Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);
				791	_add(SP, SP, AddAmount);
				792	}
				793	}
				794
				795	// Add pop instructions for preserved registers.
				796	llvm::SmallBitVector CalleeSaves =
				797	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				798	VarList GPRsToRestore;
				799	GPRsToRestore.reserve(CalleeSaves.size());
				800	// Consider FP and LR as callee-save / used as needed.
				801	if (UsesFramePointer) {
				802	CalleeSaves[RegARM32::Reg_fp] = true;
				803	}
				804	if (!MaybeLeafFunc) {
				805	CalleeSaves[RegARM32::Reg_lr] = true;
				806	}
				807	// Pop registers in ascending order just like push
				808	// (instead of in reverse order).
				809	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				810	if (CalleeSaves[i] && RegsUsed[i]) {
				811	GPRsToRestore.push_back(getPhysicalRegister(i));
				812	}
				813	}
				814	if (!GPRsToRestore.empty())
				815	_pop(GPRsToRestore);
				816
				817	if (!Ctx->getFlags().getUseSandboxing())
				818	return;
				819
				820	// Change the original ret instruction into a sandboxed return sequence.
				821	// bundle_lock
				822	// bic lr, #0xc000000f
				823	// bx lr
				824	// bundle_unlock
				825	// This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
				826	// restrict to the lower 1GB as well.
				827	Operand *RetMask =
				828	legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg \| Legal_Flex);
				829	Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
				830	Variable *RetValue = nullptr;
				831	if (RI->getSrcSize())
				832	RetValue = llvm::cast<Variable>(RI->getSrc(0));
				833	_bundle_lock();
				834	_bic(LR, LR, RetMask);
				835	_ret(LR, RetValue);
				836	_bundle_unlock();
				837	RI->setDeleted();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	838	}
				839
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	840	void TargetARM32::split64(Variable *Var) {
				841	assert(Var->getType() == IceType_i64);
				842	Variable *Lo = Var->getLo();
				843	Variable *Hi = Var->getHi();
				844	if (Lo) {
				845	assert(Hi);
				846	return;
				847	}
				848	assert(Hi == nullptr);
				849	Lo = Func->makeVariable(IceType_i32);
				850	Hi = Func->makeVariable(IceType_i32);
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	851	if (BuildDefs::dump()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	852	Lo->setName(Func, Var->getName(Func) + "__lo");
				853	Hi->setName(Func, Var->getName(Func) + "__hi");
				854	}
				855	Var->setLoHi(Lo, Hi);
				856	if (Var->getIsArg()) {
				857	Lo->setIsArg();
				858	Hi->setIsArg();
				859	}
				860	}
				861
				862	Operand TargetARM32::loOperand(Operand Operand) {
				863	assert(Operand->getType() == IceType_i64);
				864	if (Operand->getType() != IceType_i64)
				865	return Operand;
				866	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				867	split64(Var);
				868	return Var->getLo();
				869	}
				870	if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
				871	return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
				872	}
				873	if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
				874	// Conservatively disallow memory operands with side-effects (pre/post
				875	// increment) in case of duplication.
				876	assert(Mem->getAddrMode() == OperandARM32Mem::Offset \|\|
				877	Mem->getAddrMode() == OperandARM32Mem::NegOffset);
				878	if (Mem->isRegReg()) {
				879	return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
				880	Mem->getIndex(), Mem->getShiftOp(),
				881	Mem->getShiftAmt(), Mem->getAddrMode());
				882	} else {
				883	return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
				884	Mem->getOffset(), Mem->getAddrMode());
				885	}
				886	}
				887	llvm_unreachable("Unsupported operand type");
				888	return nullptr;
				889	}
				890
				891	Operand TargetARM32::hiOperand(Operand Operand) {
				892	assert(Operand->getType() == IceType_i64);
				893	if (Operand->getType() != IceType_i64)
				894	return Operand;
				895	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				896	split64(Var);
				897	return Var->getHi();
				898	}
				899	if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
				900	return Ctx->getConstantInt32(
				901	static_cast<uint32_t>(Const->getValue() >> 32));
				902	}
				903	if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
				904	// Conservatively disallow memory operands with side-effects
				905	// in case of duplication.
				906	assert(Mem->getAddrMode() == OperandARM32Mem::Offset \|\|
				907	Mem->getAddrMode() == OperandARM32Mem::NegOffset);
				908	const Type SplitType = IceType_i32;
				909	if (Mem->isRegReg()) {
				910	// We have to make a temp variable T, and add 4 to either Base or Index.
				911	// The Index may be shifted, so adding 4 can mean something else.
				912	// Thus, prefer T := Base + 4, and use T as the new Base.
				913	Variable *Base = Mem->getBase();
				914	Constant *Four = Ctx->getConstantInt32(4);
				915	Variable *NewBase = Func->makeVariable(Base->getType());
				916	lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
				917	Base, Four));
				918	return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
				919	Mem->getShiftOp(), Mem->getShiftAmt(),
				920	Mem->getAddrMode());
				921	} else {
				922	Variable *Base = Mem->getBase();
				923	ConstantInteger32 *Offset = Mem->getOffset();
				924	assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
				925	int32_t NextOffsetVal = Offset->getValue() + 4;
				926	const bool SignExt = false;
				927	if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
				928	// We have to make a temp variable and add 4 to either Base or Offset.
				929	// If we add 4 to Offset, this will convert a non-RegReg addressing
				930	// mode into a RegReg addressing mode. Since NaCl sandboxing disallows
				931	// RegReg addressing modes, prefer adding to base and replacing instead.
				932	// Thus we leave the old offset alone.
				933	Constant *Four = Ctx->getConstantInt32(4);
				934	Variable *NewBase = Func->makeVariable(Base->getType());
				935	lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
				936	NewBase, Base, Four));
				937	Base = NewBase;
				938	} else {
				939	Offset =
				940	llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
				941	}
				942	return OperandARM32Mem::create(Func, SplitType, Base, Offset,
				943	Mem->getAddrMode());
				944	}
				945	}
				946	llvm_unreachable("Unsupported operand type");
				947	return nullptr;
				948	}
				949
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	950	llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
				951	RegSetMask Exclude) const {
				952	llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
				953
				954	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				955	isFP) \
				956	if (scratch && (Include & RegSet_CallerSave)) \
				957	Registers[RegARM32::val] = true; \
				958	if (preserved && (Include & RegSet_CalleeSave)) \
				959	Registers[RegARM32::val] = true; \
				960	if (stackptr && (Include & RegSet_StackPointer)) \
				961	Registers[RegARM32::val] = true; \
				962	if (frameptr && (Include & RegSet_FramePointer)) \
				963	Registers[RegARM32::val] = true; \
				964	if (scratch && (Exclude & RegSet_CallerSave)) \
				965	Registers[RegARM32::val] = false; \
				966	if (preserved && (Exclude & RegSet_CalleeSave)) \
				967	Registers[RegARM32::val] = false; \
				968	if (stackptr && (Exclude & RegSet_StackPointer)) \
				969	Registers[RegARM32::val] = false; \
				970	if (frameptr && (Exclude & RegSet_FramePointer)) \
				971	Registers[RegARM32::val] = false;
				972
				973	REGARM32_TABLE
				974
				975	#undef X
				976
				977	return Registers;
				978	}
				979
				980	void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
				981	UsesFramePointer = true;
				982	// Conservatively require the stack to be aligned. Some stack
				983	// adjustment operations implemented below assume that the stack is
				984	// aligned before the alloca. All the alloca code ensures that the
				985	// stack alignment is preserved after the alloca. The stack alignment
				986	// restriction can be relaxed in some cases.
				987	NeedsStackAlignment = true;
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	988
				989	// TODO(stichnot): minimize the number of adjustments of SP, etc.
				990	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				991	Variable *Dest = Inst->getDest();
				992	uint32_t AlignmentParam = Inst->getAlignInBytes();
				993	// For default align=0, set it to the real value 1, to avoid any
				994	// bit-manipulation problems below.
				995	AlignmentParam = std::max(AlignmentParam, 1u);
				996
				997	// LLVM enforces power of 2 alignment.
				998	assert(llvm::isPowerOf2_32(AlignmentParam));
				999	assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
				1000
				1001	uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
				1002	if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
				1003	alignRegisterPow2(SP, Alignment);
				1004	}
				1005	Operand *TotalSize = Inst->getSizeInBytes();
				1006	if (const auto *ConstantTotalSize =
				1007	llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
				1008	uint32_t Value = ConstantTotalSize->getValue();
				1009	Value = Utils::applyAlignment(Value, Alignment);
				1010	Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
				1011	_sub(SP, SP, SubAmount);
				1012	} else {
				1013	// Non-constant sizes need to be adjusted to the next highest
				1014	// multiple of the required alignment at runtime.
				1015	TotalSize = legalize(TotalSize);
				1016	Variable *T = makeReg(IceType_i32);
				1017	_mov(T, TotalSize);
				1018	Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
				1019	_add(T, T, AddAmount);
				1020	alignRegisterPow2(T, Alignment);
				1021	_sub(SP, SP, T);
				1022	}
				1023	_mov(Dest, SP);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1024	}
				1025
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	1026	void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {
				1027	if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))
				1028	return;
				1029	Variable *SrcLoReg = legalizeToVar(SrcLo);
				1030	switch (Ty) {
				1031	default:
				1032	llvm_unreachable("Unexpected type");
				1033	case IceType_i8: {
				1034	Operand *Mask =
				1035	legalize(Ctx->getConstantInt32(0xFF), Legal_Reg \| Legal_Flex);
				1036	_tst(SrcLoReg, Mask);
				1037	break;
				1038	}
				1039	case IceType_i16: {
				1040	Operand *Mask =
				1041	legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg \| Legal_Flex);
				1042	_tst(SrcLoReg, Mask);
				1043	break;
				1044	}
				1045	case IceType_i32: {
				1046	_tst(SrcLoReg, SrcLoReg);
				1047	break;
				1048	}
				1049	case IceType_i64: {
				1050	Variable *ScratchReg = makeReg(IceType_i32);
				1051	_orrs(ScratchReg, SrcLoReg, SrcHi);
				1052	// ScratchReg isn't going to be used, but we need the
				1053	// side-effect of setting flags from this operation.
				1054	Context.insert(InstFakeUse::create(Func, ScratchReg));
				1055	}
				1056	}
				1057	InstARM32Label *Label = InstARM32Label::create(Func, this);
				1058	_br(Label, CondARM32::NE);
				1059	_trap();
				1060	Context.insert(Label);
				1061	}
				1062
				1063	void TargetARM32::lowerIDivRem(Variable Dest, Variable T, Variable *Src0R,
				1064	Operand *Src1, ExtInstr ExtFunc,
				1065	DivInstr DivFunc, const char *DivHelperName,
				1066	bool IsRemainder) {
				1067	div0Check(Dest->getType(), Src1, nullptr);
				1068	Variable *Src1R = legalizeToVar(Src1);
				1069	Variable *T0R = Src0R;
				1070	Variable *T1R = Src1R;
				1071	if (Dest->getType() != IceType_i32) {
				1072	T0R = makeReg(IceType_i32);
				1073	(this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
				1074	T1R = makeReg(IceType_i32);
				1075	(this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
				1076	}
				1077	if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
				1078	(this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
				1079	if (IsRemainder) {
				1080	Variable *T2 = makeReg(IceType_i32);
				1081	_mls(T2, T, T1R, T0R);
				1082	T = T2;
				1083	}
				1084	_mov(Dest, T);
				1085	} else {
				1086	constexpr SizeT MaxSrcs = 2;
				1087	InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
				1088	Call->addArg(T0R);
				1089	Call->addArg(T1R);
				1090	lowerCall(Call);
				1091	}
				1092	return;
				1093	}
				1094
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1095	void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1096	Variable *Dest = Inst->getDest();
				1097	// TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
				1098	// to legalize Src0 to flex or Src1 to flex and there is a reversible
				1099	// instruction. E.g., reverse subtract with immediate, register vs
				1100	// register, immediate.
				1101	// Or it may be the case that the operands aren't swapped, but the
				1102	// bits can be flipped and a different operation applied.
				1103	// E.g., use BIC (bit clear) instead of AND for some masks.
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1104	Operand *Src0 = Inst->getSrc(0);
				1105	Operand *Src1 = Inst->getSrc(1);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1106	if (Dest->getType() == IceType_i64) {
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1107	// These helper-call-involved instructions are lowered in this
				1108	// separate switch. This is because we would otherwise assume that
				1109	// we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused
				1110	// with helper calls, and such unused/redundant instructions will fail
				1111	// liveness analysis under -Om1 setting.
				1112	switch (Inst->getOp()) {
				1113	default:
				1114	break;
				1115	case InstArithmetic::Udiv:
				1116	case InstArithmetic::Sdiv:
				1117	case InstArithmetic::Urem:
				1118	case InstArithmetic::Srem: {
				1119	// Check for divide by 0 (ARM normally doesn't trap, but we want it
				1120	// to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
				1121	// to a register, which will hide a constant source operand.
				1122	// Instead, check the not-yet-legalized Src1 to optimize-out a divide
				1123	// by 0 check.
				1124	if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
				1125	if (C64->getValue() == 0) {
				1126	_trap();
				1127	return;
				1128	}
				1129	} else {
				1130	Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);
				1131	Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);
				1132	div0Check(IceType_i64, Src1Lo, Src1Hi);
				1133	}
				1134	// Technically, ARM has their own aeabi routines, but we can use the
				1135	// non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,
				1136	// but uses the more standard __moddi3 for rem.
				1137	const char *HelperName = "";
				1138	switch (Inst->getOp()) {
				1139	default:
				1140	llvm_unreachable("Should have only matched div ops.");
				1141	break;
				1142	case InstArithmetic::Udiv:
				1143	HelperName = H_udiv_i64;
				1144	break;
				1145	case InstArithmetic::Sdiv:
				1146	HelperName = H_sdiv_i64;
				1147	break;
				1148	case InstArithmetic::Urem:
				1149	HelperName = H_urem_i64;
				1150	break;
				1151	case InstArithmetic::Srem:
				1152	HelperName = H_srem_i64;
				1153	break;
				1154	}
				1155	constexpr SizeT MaxSrcs = 2;
				1156	InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
				1157	Call->addArg(Src0);
				1158	Call->addArg(Src1);
				1159	lowerCall(Call);
				1160	return;
				1161	}
				1162	}
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1163	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1164	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1165	Variable *Src0RLo = legalizeToVar(loOperand(Src0));
				1166	Variable *Src0RHi = legalizeToVar(hiOperand(Src0));
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1167	Operand *Src1Lo = loOperand(Src1);
				1168	Operand *Src1Hi = hiOperand(Src1);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1169	Variable *T_Lo = makeReg(DestLo->getType());
				1170	Variable *T_Hi = makeReg(DestHi->getType());
				1171	switch (Inst->getOp()) {
				1172	case InstArithmetic::_num:
				1173	llvm_unreachable("Unknown arithmetic operator");
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1174	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1175	case InstArithmetic::Add:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1176	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1177	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1178	_adds(T_Lo, Src0RLo, Src1Lo);
				1179	_mov(DestLo, T_Lo);
				1180	_adc(T_Hi, Src0RHi, Src1Hi);
				1181	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1182	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1183	case InstArithmetic::And:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1184	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1185	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1186	_and(T_Lo, Src0RLo, Src1Lo);
				1187	_mov(DestLo, T_Lo);
				1188	_and(T_Hi, Src0RHi, Src1Hi);
				1189	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1190	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1191	case InstArithmetic::Or:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1192	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1193	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1194	_orr(T_Lo, Src0RLo, Src1Lo);
				1195	_mov(DestLo, T_Lo);
				1196	_orr(T_Hi, Src0RHi, Src1Hi);
				1197	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1198	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1199	case InstArithmetic::Xor:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1200	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1201	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1202	_eor(T_Lo, Src0RLo, Src1Lo);
				1203	_mov(DestLo, T_Lo);
				1204	_eor(T_Hi, Src0RHi, Src1Hi);
				1205	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1206	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1207	case InstArithmetic::Sub:
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1208	Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);
				1209	Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1210	_subs(T_Lo, Src0RLo, Src1Lo);
				1211	_mov(DestLo, T_Lo);
				1212	_sbc(T_Hi, Src0RHi, Src1Hi);
				1213	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1214	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1215	case InstArithmetic::Mul: {
				1216	// GCC 4.8 does:
				1217	// a=b*c ==>
				1218	// t_acc =(mul) (b.lo * c.hi)
				1219	// t_acc =(mla) (c.lo * b.hi) + t_acc
				1220	// t.hi,t.lo =(umull) b.lo * c.lo
				1221	// t.hi += t_acc
				1222	// a.lo = t.lo
				1223	// a.hi = t.hi
				1224	//
				1225	// LLVM does:
				1226	// t.hi,t.lo =(umull) b.lo * c.lo
				1227	// t.hi =(mla) (b.lo * c.hi) + t.hi
				1228	// t.hi =(mla) (b.hi * c.lo) + t.hi
				1229	// a.lo = t.lo
				1230	// a.hi = t.hi
				1231	//
				1232	// LLVM's lowering has fewer instructions, but more register pressure:
				1233	// t.lo is live from beginning to end, while GCC delays the two-dest
				1234	// instruction till the end, and kills c.hi immediately.
				1235	Variable *T_Acc = makeReg(IceType_i32);
				1236	Variable *T_Acc1 = makeReg(IceType_i32);
				1237	Variable *T_Hi1 = makeReg(IceType_i32);
				1238	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1239	Variable *Src1RHi = legalizeToVar(Src1Hi);
				1240	_mul(T_Acc, Src0RLo, Src1RHi);
				1241	_mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
				1242	_umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
				1243	_add(T_Hi, T_Hi1, T_Acc1);
				1244	_mov(DestLo, T_Lo);
				1245	_mov(DestHi, T_Hi);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1246	return;
				1247	}
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1248	case InstArithmetic::Shl: {
				1249	// a=b<<c ==>
				1250	// GCC 4.8 does:
				1251	// sub t_c1, c.lo, #32
				1252	// lsl t_hi, b.hi, c.lo
				1253	// orr t_hi, t_hi, b.lo, lsl t_c1
				1254	// rsb t_c2, c.lo, #32
				1255	// orr t_hi, t_hi, b.lo, lsr t_c2
				1256	// lsl t_lo, b.lo, c.lo
				1257	// a.lo = t_lo
				1258	// a.hi = t_hi
				1259	// Can be strength-reduced for constant-shifts, but we don't do
				1260	// that for now.
				1261	// Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
				1262	// On ARM, shifts only take the lower 8 bits of the shift register,
				1263	// and saturate to the range 0-32, so the negative value will
				1264	// saturate to 32.
				1265	Variable *T_Hi = makeReg(IceType_i32);
				1266	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1267	Constant *ThirtyTwo = Ctx->getConstantInt32(32);
				1268	Variable *T_C1 = makeReg(IceType_i32);
				1269	Variable *T_C2 = makeReg(IceType_i32);
				1270	_sub(T_C1, Src1RLo, ThirtyTwo);
				1271	_lsl(T_Hi, Src0RHi, Src1RLo);
				1272	_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1273	OperandARM32::LSL, T_C1));
				1274	_rsb(T_C2, Src1RLo, ThirtyTwo);
				1275	_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1276	OperandARM32::LSR, T_C2));
				1277	_mov(DestHi, T_Hi);
				1278	Variable *T_Lo = makeReg(IceType_i32);
				1279	// _mov seems to sometimes have better register preferencing than lsl.
				1280	// Otherwise mov w/ lsl shifted register is a pseudo-instruction
				1281	// that maps to lsl.
				1282	_mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1283	OperandARM32::LSL, Src1RLo));
				1284	_mov(DestLo, T_Lo);
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1285	return;
				1286	}
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1287	case InstArithmetic::Lshr:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1288	// a=b>>c (unsigned) ==>
				1289	// GCC 4.8 does:
				1290	// rsb t_c1, c.lo, #32
				1291	// lsr t_lo, b.lo, c.lo
				1292	// orr t_lo, t_lo, b.hi, lsl t_c1
				1293	// sub t_c2, c.lo, #32
				1294	// orr t_lo, t_lo, b.hi, lsr t_c2
				1295	// lsr t_hi, b.hi, c.lo
				1296	// a.lo = t_lo
				1297	// a.hi = t_hi
				1298	case InstArithmetic::Ashr: {
				1299	// a=b>>c (signed) ==> ...
				1300	// Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
				1301	// and the next orr should be conditioned on PLUS. The last two
				1302	// right shifts should also be arithmetic.
				1303	bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
				1304	Variable *T_Lo = makeReg(IceType_i32);
				1305	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1306	Constant *ThirtyTwo = Ctx->getConstantInt32(32);
				1307	Variable *T_C1 = makeReg(IceType_i32);
				1308	Variable *T_C2 = makeReg(IceType_i32);
				1309	_rsb(T_C1, Src1RLo, ThirtyTwo);
				1310	_lsr(T_Lo, Src0RLo, Src1RLo);
				1311	_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1312	OperandARM32::LSL, T_C1));
				1313	OperandARM32::ShiftKind RShiftKind;
				1314	CondARM32::Cond Pred;
				1315	if (IsAshr) {
				1316	_subs(T_C2, Src1RLo, ThirtyTwo);
				1317	RShiftKind = OperandARM32::ASR;
				1318	Pred = CondARM32::PL;
				1319	} else {
				1320	_sub(T_C2, Src1RLo, ThirtyTwo);
				1321	RShiftKind = OperandARM32::LSR;
				1322	Pred = CondARM32::AL;
				1323	}
				1324	_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1325	RShiftKind, T_C2),
				1326	Pred);
				1327	_mov(DestLo, T_Lo);
				1328	Variable *T_Hi = makeReg(IceType_i32);
				1329	_mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1330	RShiftKind, Src1RLo));
				1331	_mov(DestHi, T_Hi);
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	1332	return;
				1333	}
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1334	case InstArithmetic::Fadd:
				1335	case InstArithmetic::Fsub:
				1336	case InstArithmetic::Fmul:
				1337	case InstArithmetic::Fdiv:
				1338	case InstArithmetic::Frem:
				1339	llvm_unreachable("FP instruction with i64 type");
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1340	return;
				1341	case InstArithmetic::Udiv:
				1342	case InstArithmetic::Sdiv:
				1343	case InstArithmetic::Urem:
				1344	case InstArithmetic::Srem:
				1345	llvm_unreachable("Call-helper-involved instruction for i64 type "
				1346	"should have already been handled before");
				1347	return;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1348	}
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1349	return;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1350	} else if (isVectorType(Dest->getType())) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1351	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	70fa525	2015-07-06 14:01:25 -0700	[diff] [blame]	1352	return;
				1353	}
				1354	// Dest->getType() is a non-i64 scalar.
				1355	Variable *Src0R = legalizeToVar(Src0);
				1356	Variable *T = makeReg(Dest->getType());
				1357	// Handle div/rem separately. They require a non-legalized Src1 to inspect
				1358	// whether or not Src1 is a non-zero constant. Once legalized it is more
				1359	// difficult to determine (constant may be moved to a register).
				1360	switch (Inst->getOp()) {
				1361	default:
				1362	break;
				1363	case InstArithmetic::Udiv: {
				1364	constexpr bool IsRemainder = false;
				1365	lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
				1366	H_udiv_i32, IsRemainder);
				1367	return;
				1368	}
				1369	case InstArithmetic::Sdiv: {
				1370	constexpr bool IsRemainder = false;
				1371	lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
				1372	H_sdiv_i32, IsRemainder);
				1373	return;
				1374	}
				1375	case InstArithmetic::Urem: {
				1376	constexpr bool IsRemainder = true;
				1377	lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
				1378	H_urem_i32, IsRemainder);
				1379	return;
				1380	}
				1381	case InstArithmetic::Srem: {
				1382	constexpr bool IsRemainder = true;
				1383	lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
				1384	H_srem_i32, IsRemainder);
				1385	return;
				1386	}
				1387	}
				1388
				1389	Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);
				1390	switch (Inst->getOp()) {
				1391	case InstArithmetic::_num:
				1392	llvm_unreachable("Unknown arithmetic operator");
				1393	return;
				1394	case InstArithmetic::Add:
				1395	_add(T, Src0R, Src1RF);
				1396	_mov(Dest, T);
				1397	return;
				1398	case InstArithmetic::And:
				1399	_and(T, Src0R, Src1RF);
				1400	_mov(Dest, T);
				1401	return;
				1402	case InstArithmetic::Or:
				1403	_orr(T, Src0R, Src1RF);
				1404	_mov(Dest, T);
				1405	return;
				1406	case InstArithmetic::Xor:
				1407	_eor(T, Src0R, Src1RF);
				1408	_mov(Dest, T);
				1409	return;
				1410	case InstArithmetic::Sub:
				1411	_sub(T, Src0R, Src1RF);
				1412	_mov(Dest, T);
				1413	return;
				1414	case InstArithmetic::Mul: {
				1415	Variable *Src1R = legalizeToVar(Src1RF);
				1416	_mul(T, Src0R, Src1R);
				1417	_mov(Dest, T);
				1418	return;
				1419	}
				1420	case InstArithmetic::Shl:
				1421	_lsl(T, Src0R, Src1RF);
				1422	_mov(Dest, T);
				1423	return;
				1424	case InstArithmetic::Lshr:
				1425	_lsr(T, Src0R, Src1RF);
				1426	_mov(Dest, T);
				1427	return;
				1428	case InstArithmetic::Ashr:
				1429	_asr(T, Src0R, Src1RF);
				1430	_mov(Dest, T);
				1431	return;
				1432	case InstArithmetic::Udiv:
				1433	case InstArithmetic::Sdiv:
				1434	case InstArithmetic::Urem:
				1435	case InstArithmetic::Srem:
				1436	llvm_unreachable("Integer div/rem should have been handled earlier.");
				1437	return;
				1438	case InstArithmetic::Fadd:
				1439	UnimplementedError(Func->getContext()->getFlags());
				1440	return;
				1441	case InstArithmetic::Fsub:
				1442	UnimplementedError(Func->getContext()->getFlags());
				1443	return;
				1444	case InstArithmetic::Fmul:
				1445	UnimplementedError(Func->getContext()->getFlags());
				1446	return;
				1447	case InstArithmetic::Fdiv:
				1448	UnimplementedError(Func->getContext()->getFlags());
				1449	return;
				1450	case InstArithmetic::Frem:
				1451	UnimplementedError(Func->getContext()->getFlags());
				1452	return;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1453	}
				1454	}
				1455
				1456	void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1457	Variable *Dest = Inst->getDest();
				1458	Operand *Src0 = Inst->getSrc(0);
				1459	assert(Dest->getType() == Src0->getType());
				1460	if (Dest->getType() == IceType_i64) {
				1461	Src0 = legalize(Src0);
				1462	Operand *Src0Lo = loOperand(Src0);
				1463	Operand *Src0Hi = hiOperand(Src0);
				1464	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1465	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1466	Variable T_Lo = nullptr, T_Hi = nullptr;
				1467	_mov(T_Lo, Src0Lo);
				1468	_mov(DestLo, T_Lo);
				1469	_mov(T_Hi, Src0Hi);
				1470	_mov(DestHi, T_Hi);
				1471	} else {
				1472	Operand *SrcR;
				1473	if (Dest->hasReg()) {
				1474	// If Dest already has a physical register, then legalize the
				1475	// Src operand into a Variable with the same register
				1476	// assignment. This is mostly a workaround for advanced phi
				1477	// lowering's ad-hoc register allocation which assumes no
				1478	// register allocation is needed when at least one of the
				1479	// operands is non-memory.
				1480	// TODO(jvoung): check this for ARM.
				1481	SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
				1482	} else {
				1483	// Dest could be a stack operand. Since we could potentially need
				1484	// to do a Store (and store can only have Register operands),
				1485	// legalize this to a register.
				1486	SrcR = legalize(Src0, Legal_Reg);
				1487	}
				1488	if (isVectorType(Dest->getType())) {
				1489	UnimplementedError(Func->getContext()->getFlags());
				1490	} else {
				1491	_mov(Dest, SrcR);
				1492	}
				1493	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1494	}
				1495
				1496	void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1497	if (Inst->isUnconditional()) {
				1498	_br(Inst->getTargetUnconditional());
				1499	return;
				1500	}
				1501	Operand *Cond = Inst->getCondition();
				1502	// TODO(jvoung): Handle folding opportunities.
				1503
				1504	Variable *Src0R = legalizeToVar(Cond);
				1505	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1506	_cmp(Src0R, Zero);
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	1507	_br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1508	}
				1509
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1510	void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	1511	MaybeLeafFunc = false;
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1512	NeedsStackAlignment = true;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	1513
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1514	// Assign arguments to registers and stack. Also reserve stack.
				1515	TargetARM32::CallingConv CC;
				1516	// Pair of Arg Operand -> GPR number assignments.
				1517	llvm::SmallVector<std::pair<Operand *, int32_t>,
				1518	TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
				1519	// Pair of Arg Operand -> stack offset.
				1520	llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
				1521	int32_t ParameterAreaSizeBytes = 0;
				1522
				1523	// Classify each argument operand according to the location where the
				1524	// argument is passed.
				1525	for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
				1526	Operand *Arg = Instr->getArg(i);
				1527	Type Ty = Arg->getType();
				1528	bool InRegs = false;
				1529	if (isVectorType(Ty)) {
				1530	UnimplementedError(Func->getContext()->getFlags());
				1531	} else if (isFloatingType(Ty)) {
				1532	UnimplementedError(Func->getContext()->getFlags());
				1533	} else if (Ty == IceType_i64) {
				1534	std::pair<int32_t, int32_t> Regs;
				1535	if (CC.I64InRegs(&Regs)) {
				1536	InRegs = true;
				1537	Operand *Lo = loOperand(Arg);
				1538	Operand *Hi = hiOperand(Arg);
				1539	GPRArgs.push_back(std::make_pair(Lo, Regs.first));
				1540	GPRArgs.push_back(std::make_pair(Hi, Regs.second));
				1541	}
				1542	} else {
				1543	assert(Ty == IceType_i32);
				1544	int32_t Reg;
				1545	if (CC.I32InReg(&Reg)) {
				1546	InRegs = true;
				1547	GPRArgs.push_back(std::make_pair(Arg, Reg));
				1548	}
				1549	}
				1550
				1551	if (!InRegs) {
				1552	ParameterAreaSizeBytes =
				1553	applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
				1554	StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
				1555	ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
				1556	}
				1557	}
				1558
				1559	// Adjust the parameter area so that the stack is aligned. It is
				1560	// assumed that the stack is already aligned at the start of the
				1561	// calling sequence.
				1562	ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
				1563
				1564	// Subtract the appropriate amount for the argument area. This also
				1565	// takes care of setting the stack adjustment during emission.
				1566	//
				1567	// TODO: If for some reason the call instruction gets dead-code
				1568	// eliminated after lowering, we would need to ensure that the
				1569	// pre-call and the post-call esp adjustment get eliminated as well.
				1570	if (ParameterAreaSizeBytes) {
				1571	Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
				1572	Legal_Reg \| Legal_Flex);
				1573	_adjust_stack(ParameterAreaSizeBytes, SubAmount);
				1574	}
				1575
				1576	// Copy arguments that are passed on the stack to the appropriate
				1577	// stack locations.
				1578	Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
				1579	for (auto &StackArg : StackArgs) {
				1580	ConstantInteger32 *Loc =
				1581	llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
				1582	Type Ty = StackArg.first->getType();
				1583	OperandARM32Mem *Addr;
				1584	constexpr bool SignExt = false;
				1585	if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
				1586	Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
				1587	} else {
				1588	Variable *NewBase = Func->makeVariable(SP->getType());
				1589	lowerArithmetic(
				1590	InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
				1591	Addr = formMemoryOperand(NewBase, Ty);
				1592	}
				1593	lowerStore(InstStore::create(Func, StackArg.first, Addr));
				1594	}
				1595
				1596	// Copy arguments to be passed in registers to the appropriate registers.
				1597	for (auto &GPRArg : GPRArgs) {
				1598	Variable *Reg = legalizeToVar(GPRArg.first, GPRArg.second);
				1599	// Generate a FakeUse of register arguments so that they do not get
				1600	// dead code eliminated as a result of the FakeKill of scratch
				1601	// registers after the call.
				1602	Context.insert(InstFakeUse::create(Func, Reg));
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1603	}
				1604
				1605	// Generate the call instruction. Assign its result to a temporary
				1606	// with high register allocation weight.
				1607	Variable *Dest = Instr->getDest();
				1608	// ReturnReg doubles as ReturnRegLo as necessary.
				1609	Variable *ReturnReg = nullptr;
				1610	Variable *ReturnRegHi = nullptr;
				1611	if (Dest) {
				1612	switch (Dest->getType()) {
				1613	case IceType_NUM:
				1614	llvm_unreachable("Invalid Call dest type");
				1615	break;
				1616	case IceType_void:
				1617	break;
				1618	case IceType_i1:
				1619	case IceType_i8:
				1620	case IceType_i16:
				1621	case IceType_i32:
				1622	ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
				1623	break;
				1624	case IceType_i64:
				1625	ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
				1626	ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
				1627	break;
				1628	case IceType_f32:
				1629	case IceType_f64:
				1630	// Use S and D regs.
				1631	UnimplementedError(Func->getContext()->getFlags());
				1632	break;
				1633	case IceType_v4i1:
				1634	case IceType_v8i1:
				1635	case IceType_v16i1:
				1636	case IceType_v16i8:
				1637	case IceType_v8i16:
				1638	case IceType_v4i32:
				1639	case IceType_v4f32:
				1640	// Use Q regs.
				1641	UnimplementedError(Func->getContext()->getFlags());
				1642	break;
				1643	}
				1644	}
				1645	Operand *CallTarget = Instr->getCallTarget();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1646	// TODO(jvoung): Handle sandboxing.
				1647	// const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
				1648
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1649	// Allow ConstantRelocatable to be left alone as a direct call,
				1650	// but force other constants like ConstantInteger32 to be in
				1651	// a register and make it an indirect call.
				1652	if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
				1653	CallTarget = legalize(CallTarget, Legal_Reg);
				1654	}
				1655	Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
				1656	Context.insert(NewCall);
				1657	if (ReturnRegHi)
				1658	Context.insert(InstFakeDef::create(Func, ReturnRegHi));
				1659
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1660	// Add the appropriate offset to SP. The call instruction takes care
				1661	// of resetting the stack offset during emission.
				1662	if (ParameterAreaSizeBytes) {
				1663	Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
				1664	Legal_Reg \| Legal_Flex);
				1665	Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
				1666	_add(SP, SP, AddAmount);
				1667	}
				1668
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1669	// Insert a register-kill pseudo instruction.
				1670	Context.insert(InstFakeKill::create(Func, NewCall));
				1671
				1672	// Generate a FakeUse to keep the call live if necessary.
				1673	if (Instr->hasSideEffects() && ReturnReg) {
				1674	Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
				1675	Context.insert(FakeUse);
				1676	}
				1677
				1678	if (!Dest)
				1679	return;
				1680
				1681	// Assign the result of the call to Dest.
				1682	if (ReturnReg) {
				1683	if (ReturnRegHi) {
				1684	assert(Dest->getType() == IceType_i64);
				1685	split64(Dest);
				1686	Variable *DestLo = Dest->getLo();
				1687	Variable *DestHi = Dest->getHi();
				1688	_mov(DestLo, ReturnReg);
				1689	_mov(DestHi, ReturnRegHi);
				1690	} else {
				1691	assert(Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|
				1692	Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|
				1693	isVectorType(Dest->getType()));
				1694	if (isFloatingType(Dest->getType()) \|\| isVectorType(Dest->getType())) {
				1695	UnimplementedError(Func->getContext()->getFlags());
				1696	} else {
				1697	_mov(Dest, ReturnReg);
				1698	}
				1699	}
				1700	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1701	}
				1702
				1703	void TargetARM32::lowerCast(const InstCast *Inst) {
				1704	InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1705	Variable *Dest = Inst->getDest();
				1706	Operand *Src0 = Inst->getSrc(0);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1707	switch (CastKind) {
				1708	default:
				1709	Func->setError("Cast type not supported");
				1710	return;
				1711	case InstCast::Sext: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1712	if (isVectorType(Dest->getType())) {
				1713	UnimplementedError(Func->getContext()->getFlags());
				1714	} else if (Dest->getType() == IceType_i64) {
				1715	// t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
				1716	Constant *ShiftAmt = Ctx->getConstantInt32(31);
				1717	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1718	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1719	Variable *T_Lo = makeReg(DestLo->getType());
				1720	if (Src0->getType() == IceType_i32) {
				1721	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1722	_mov(T_Lo, Src0RF);
				1723	} else if (Src0->getType() == IceType_i1) {
				1724	Variable *Src0R = legalizeToVar(Src0);
				1725	_lsl(T_Lo, Src0R, ShiftAmt);
				1726	_asr(T_Lo, T_Lo, ShiftAmt);
				1727	} else {
				1728	Variable *Src0R = legalizeToVar(Src0);
				1729	_sxt(T_Lo, Src0R);
				1730	}
				1731	_mov(DestLo, T_Lo);
				1732	Variable *T_Hi = makeReg(DestHi->getType());
				1733	if (Src0->getType() != IceType_i1) {
				1734	_mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
				1735	OperandARM32::ASR, ShiftAmt));
				1736	} else {
				1737	// For i1, the asr instruction is already done above.
				1738	_mov(T_Hi, T_Lo);
				1739	}
				1740	_mov(DestHi, T_Hi);
				1741	} else if (Src0->getType() == IceType_i1) {
				1742	// GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
				1743	// lsl t1, src_reg, 31
				1744	// asr t1, t1, 31
				1745	// dst = t1
				1746	Variable *Src0R = legalizeToVar(Src0);
				1747	Constant *ShiftAmt = Ctx->getConstantInt32(31);
				1748	Variable *T = makeReg(Dest->getType());
				1749	_lsl(T, Src0R, ShiftAmt);
				1750	_asr(T, T, ShiftAmt);
				1751	_mov(Dest, T);
				1752	} else {
				1753	// t1 = sxt src; dst = t1
				1754	Variable *Src0R = legalizeToVar(Src0);
				1755	Variable *T = makeReg(Dest->getType());
				1756	_sxt(T, Src0R);
				1757	_mov(Dest, T);
				1758	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1759	break;
				1760	}
				1761	case InstCast::Zext: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1762	if (isVectorType(Dest->getType())) {
				1763	UnimplementedError(Func->getContext()->getFlags());
				1764	} else if (Dest->getType() == IceType_i64) {
				1765	// t1=uxtb src; dst.lo=t1; dst.hi=0
				1766	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1767	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1768	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1769	Variable *T_Lo = makeReg(DestLo->getType());
				1770	// i32 and i1 can just take up the whole register.
				1771	// i32 doesn't need uxt, while i1 will have an and mask later anyway.
				1772	if (Src0->getType() == IceType_i32 \|\| Src0->getType() == IceType_i1) {
				1773	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1774	_mov(T_Lo, Src0RF);
				1775	} else {
				1776	Variable *Src0R = legalizeToVar(Src0);
				1777	_uxt(T_Lo, Src0R);
				1778	}
				1779	if (Src0->getType() == IceType_i1) {
				1780	Constant *One = Ctx->getConstantInt32(1);
				1781	_and(T_Lo, T_Lo, One);
				1782	}
				1783	_mov(DestLo, T_Lo);
				1784	Variable *T_Hi = makeReg(DestLo->getType());
				1785	_mov(T_Hi, Zero);
				1786	_mov(DestHi, T_Hi);
				1787	} else if (Src0->getType() == IceType_i1) {
				1788	// t = Src0; t &= 1; Dest = t
				1789	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1790	Constant *One = Ctx->getConstantInt32(1);
				1791	Variable *T = makeReg(Dest->getType());
				1792	// Just use _mov instead of _uxt since all registers are 32-bit.
				1793	// _uxt requires the source to be a register so could have required
				1794	// a _mov from legalize anyway.
				1795	_mov(T, Src0RF);
				1796	_and(T, T, One);
				1797	_mov(Dest, T);
				1798	} else {
				1799	// t1 = uxt src; dst = t1
				1800	Variable *Src0R = legalizeToVar(Src0);
				1801	Variable *T = makeReg(Dest->getType());
				1802	_uxt(T, Src0R);
				1803	_mov(Dest, T);
				1804	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1805	break;
				1806	}
				1807	case InstCast::Trunc: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1808	if (isVectorType(Dest->getType())) {
				1809	UnimplementedError(Func->getContext()->getFlags());
				1810	} else {
				1811	Operand *Src0 = Inst->getSrc(0);
				1812	if (Src0->getType() == IceType_i64)
				1813	Src0 = loOperand(Src0);
				1814	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1815	// t1 = trunc Src0RF; Dest = t1
				1816	Variable *T = makeReg(Dest->getType());
				1817	_mov(T, Src0RF);
				1818	if (Dest->getType() == IceType_i1)
				1819	_and(T, T, Ctx->getConstantInt1(1));
				1820	_mov(Dest, T);
				1821	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1822	break;
				1823	}
				1824	case InstCast::Fptrunc:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1825	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1826	break;
				1827	case InstCast::Fpext: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1828	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1829	break;
				1830	}
				1831	case InstCast::Fptosi:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1832	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1833	break;
				1834	case InstCast::Fptoui:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1835	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1836	break;
				1837	case InstCast::Sitofp:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1838	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1839	break;
				1840	case InstCast::Uitofp: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1841	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1842	break;
				1843	}
				1844	case InstCast::Bitcast: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1845	Operand *Src0 = Inst->getSrc(0);
				1846	if (Dest->getType() == Src0->getType()) {
				1847	InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
				1848	lowerAssign(Assign);
				1849	return;
				1850	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1851	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1852	break;
				1853	}
				1854	}
				1855	}
				1856
				1857	void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
				1858	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1859	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1860	}
				1861
				1862	void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
				1863	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1864	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1865	}
				1866
				1867	void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1868	Variable *Dest = Inst->getDest();
				1869	Operand *Src0 = Inst->getSrc(0);
				1870	Operand *Src1 = Inst->getSrc(1);
				1871
				1872	if (isVectorType(Dest->getType())) {
				1873	UnimplementedError(Func->getContext()->getFlags());
				1874	return;
				1875	}
				1876
				1877	// a=icmp cond, b, c ==>
				1878	// GCC does:
				1879	// cmp b.hi, c.hi or cmp b.lo, c.lo
				1880	// cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
				1881	// mov.<C1> t, #1 mov.<C1> t, #1
				1882	// mov.<C2> t, #0 mov.<C2> t, #0
				1883	// mov a, t mov a, t
				1884	// where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
				1885	// is used for signed compares. In some cases, b and c need to be swapped
				1886	// as well.
				1887	//
				1888	// LLVM does:
				1889	// for EQ and NE:
				1890	// eor t1, b.hi, c.hi
				1891	// eor t2, b.lo, c.hi
				1892	// orrs t, t1, t2
				1893	// mov.<C> t, #1
				1894	// mov a, t
				1895	//
				1896	// that's nice in that it's just as short but has fewer dependencies
				1897	// for better ILP at the cost of more registers.
				1898	//
				1899	// Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
				1900	// two unconditional mov #0, two cmps, two conditional mov #1,
				1901	// and one conditonal reg mov. That has few dependencies for good ILP,
				1902	// but is a longer sequence.
				1903	//
				1904	// So, we are going with the GCC version since it's usually better (except
				1905	// perhaps for eq/ne). We could revisit special-casing eq/ne later.
				1906	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1907	Constant *One = Ctx->getConstantInt32(1);
				1908	if (Src0->getType() == IceType_i64) {
				1909	InstIcmp::ICond Conditon = Inst->getCondition();
				1910	size_t Index = static_cast<size_t>(Conditon);
				1911	assert(Index < TableIcmp64Size);
				1912	Variable Src0Lo, Src0Hi;
				1913	Operand Src1LoRF, Src1HiRF;
				1914	if (TableIcmp64[Index].Swapped) {
				1915	Src0Lo = legalizeToVar(loOperand(Src1));
				1916	Src0Hi = legalizeToVar(hiOperand(Src1));
				1917	Src1LoRF = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);
				1918	Src1HiRF = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);
				1919	} else {
				1920	Src0Lo = legalizeToVar(loOperand(Src0));
				1921	Src0Hi = legalizeToVar(hiOperand(Src0));
				1922	Src1LoRF = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);
				1923	Src1HiRF = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);
				1924	}
				1925	Variable *T = makeReg(IceType_i32);
				1926	if (TableIcmp64[Index].IsSigned) {
				1927	Variable *ScratchReg = makeReg(IceType_i32);
				1928	_cmp(Src0Lo, Src1LoRF);
				1929	_sbcs(ScratchReg, Src0Hi, Src1HiRF);
				1930	// ScratchReg isn't going to be used, but we need the
				1931	// side-effect of setting flags from this operation.
				1932	Context.insert(InstFakeUse::create(Func, ScratchReg));
				1933	} else {
				1934	_cmp(Src0Hi, Src1HiRF);
				1935	_cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
				1936	}
				1937	_mov(T, One, TableIcmp64[Index].C1);
				1938	_mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
				1939	_mov(Dest, T);
				1940	return;
				1941	}
				1942
				1943	// a=icmp cond b, c ==>
				1944	// GCC does:
				1945	// <u/s>xtb tb, b
				1946	// <u/s>xtb tc, c
				1947	// cmp tb, tc
				1948	// mov.C1 t, #0
				1949	// mov.C2 t, #1
				1950	// mov a, t
				1951	// where the unsigned/sign extension is not needed for 32-bit.
				1952	// They also have special cases for EQ and NE. E.g., for NE:
				1953	// <extend to tb, tc>
				1954	// subs t, tb, tc
				1955	// movne t, #1
				1956	// mov a, t
				1957	//
				1958	// LLVM does:
				1959	// lsl tb, b, #<N>
				1960	// mov t, #0
				1961	// cmp tb, c, lsl #<N>
				1962	// mov.<C> t, #1
				1963	// mov a, t
				1964	//
				1965	// the left shift is by 0, 16, or 24, which allows the comparison to focus
				1966	// on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
				1967	// For the unsigned case, for some reason it does similar to GCC and does
				1968	// a uxtb first. It's not clear to me why that special-casing is needed.
				1969	//
				1970	// We'll go with the LLVM way for now, since it's shorter and has just as
				1971	// few dependencies.
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1972	int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
				1973	assert(ShiftAmt >= 0);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1974	Constant *ShiftConst = nullptr;
				1975	Variable *Src0R = nullptr;
				1976	Variable *T = makeReg(IceType_i32);
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1977	if (ShiftAmt) {
				1978	ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1979	Src0R = makeReg(IceType_i32);
				1980	_lsl(Src0R, legalizeToVar(Src0), ShiftConst);
				1981	} else {
				1982	Src0R = legalizeToVar(Src0);
				1983	}
				1984	_mov(T, Zero);
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1985	if (ShiftAmt) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1986	Variable *Src1R = legalizeToVar(Src1);
				1987	OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
				1988	Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
				1989	_cmp(Src0R, Src1RShifted);
				1990	} else {
				1991	Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);
				1992	_cmp(Src0R, Src1RF);
				1993	}
				1994	_mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
				1995	_mov(Dest, T);
				1996	return;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1997	}
				1998
				1999	void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
				2000	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2001	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2002	}
				2003
				2004	void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
				2005	switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
				2006	case Intrinsics::AtomicCmpxchg: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2007	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2008	return;
				2009	}
				2010	case Intrinsics::AtomicFence:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2011	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2012	return;
				2013	case Intrinsics::AtomicFenceAll:
				2014	// NOTE: FenceAll should prevent and load/store from being moved
				2015	// across the fence (both atomic and non-atomic). The InstARM32Mfence
				2016	// instruction is currently marked coarsely as "HasSideEffects".
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2017	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2018	return;
				2019	case Intrinsics::AtomicIsLockFree: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2020	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2021	return;
				2022	}
				2023	case Intrinsics::AtomicLoad: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2024	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2025	return;
				2026	}
				2027	case Intrinsics::AtomicRMW:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2028	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2029	return;
				2030	case Intrinsics::AtomicStore: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2031	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2032	return;
				2033	}
				2034	case Intrinsics::Bswap: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2035	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2036	return;
				2037	}
				2038	case Intrinsics::Ctpop: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2039	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2040	return;
				2041	}
				2042	case Intrinsics::Ctlz: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2043	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2044	return;
				2045	}
				2046	case Intrinsics::Cttz: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2047	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2048	return;
				2049	}
				2050	case Intrinsics::Fabs: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2051	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2052	return;
				2053	}
				2054	case Intrinsics::Longjmp: {
				2055	InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
				2056	Call->addArg(Instr->getArg(0));
				2057	Call->addArg(Instr->getArg(1));
				2058	lowerCall(Call);
				2059	return;
				2060	}
				2061	case Intrinsics::Memcpy: {
				2062	// In the future, we could potentially emit an inline memcpy/memset, etc.
				2063	// for intrinsic calls w/ a known length.
				2064	InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
				2065	Call->addArg(Instr->getArg(0));
				2066	Call->addArg(Instr->getArg(1));
				2067	Call->addArg(Instr->getArg(2));
				2068	lowerCall(Call);
				2069	return;
				2070	}
				2071	case Intrinsics::Memmove: {
				2072	InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
				2073	Call->addArg(Instr->getArg(0));
				2074	Call->addArg(Instr->getArg(1));
				2075	Call->addArg(Instr->getArg(2));
				2076	lowerCall(Call);
				2077	return;
				2078	}
				2079	case Intrinsics::Memset: {
				2080	// The value operand needs to be extended to a stack slot size
				2081	// because the PNaCl ABI requires arguments to be at least 32 bits
				2082	// wide.
				2083	Operand *ValOp = Instr->getArg(1);
				2084	assert(ValOp->getType() == IceType_i8);
				2085	Variable *ValExt = Func->makeVariable(stackSlotType());
				2086	lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
				2087	InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
				2088	Call->addArg(Instr->getArg(0));
				2089	Call->addArg(ValExt);
				2090	Call->addArg(Instr->getArg(2));
				2091	lowerCall(Call);
				2092	return;
				2093	}
				2094	case Intrinsics::NaClReadTP: {
				2095	if (Ctx->getFlags().getUseSandboxing()) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2096	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2097	} else {
				2098	InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
				2099	lowerCall(Call);
				2100	}
				2101	return;
				2102	}
				2103	case Intrinsics::Setjmp: {
				2104	InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
				2105	Call->addArg(Instr->getArg(0));
				2106	lowerCall(Call);
				2107	return;
				2108	}
				2109	case Intrinsics::Sqrt: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2110	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2111	return;
				2112	}
				2113	case Intrinsics::Stacksave: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2114	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2115	return;
				2116	}
				2117	case Intrinsics::Stackrestore: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2118	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2119	return;
				2120	}
				2121	case Intrinsics::Trap:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2122	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2123	return;
				2124	case Intrinsics::UnknownIntrinsic:
				2125	Func->setError("Should not be lowering UnknownIntrinsic");
				2126	return;
				2127	}
				2128	return;
				2129	}
				2130
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2131	void TargetARM32::lowerLoad(const InstLoad *Load) {
				2132	// A Load instruction can be treated the same as an Assign
				2133	// instruction, after the source operand is transformed into an
				2134	// OperandARM32Mem operand.
				2135	Type Ty = Load->getDest()->getType();
				2136	Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
				2137	Variable *DestLoad = Load->getDest();
				2138
				2139	// TODO(jvoung): handled folding opportunities. Sign and zero extension
				2140	// can be folded into a load.
				2141	InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
				2142	lowerAssign(Assign);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2143	}
				2144
				2145	void TargetARM32::doAddressOptLoad() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2146	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2147	}
				2148
				2149	void TargetARM32::randomlyInsertNop(float Probability) {
				2150	RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
				2151	if (RNG.getTrueWithProbability(Probability)) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2152	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2153	}
				2154	}
				2155
				2156	void TargetARM32::lowerPhi(const InstPhi * /Inst/) {
				2157	Func->setError("Phi found in regular instruction list");
				2158	}
				2159
				2160	void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2161	Variable *Reg = nullptr;
				2162	if (Inst->hasRetValue()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2163	Operand *Src0 = Inst->getRetValue();
				2164	if (Src0->getType() == IceType_i64) {
				2165	Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0);
				2166	Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1);
				2167	Reg = R0;
				2168	Context.insert(InstFakeUse::create(Func, R1));
				2169	} else if (isScalarFloatingType(Src0->getType())) {
				2170	UnimplementedError(Func->getContext()->getFlags());
				2171	} else if (isVectorType(Src0->getType())) {
				2172	UnimplementedError(Func->getContext()->getFlags());
				2173	} else {
				2174	Operand *Src0F = legalize(Src0, Legal_Reg \| Legal_Flex);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	2175	_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2176	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2177	}
				2178	// Add a ret instruction even if sandboxing is enabled, because
				2179	// addEpilog explicitly looks for a ret instruction as a marker for
				2180	// where to insert the frame removal instructions.
				2181	// addEpilog is responsible for restoring the "lr" register as needed
				2182	// prior to this ret instruction.
				2183	_ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
				2184	// Add a fake use of sp to make sure sp stays alive for the entire
				2185	// function. Otherwise post-call sp adjustments get dead-code
				2186	// eliminated. TODO: Are there more places where the fake use
				2187	// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
				2188	// have a ret instruction.
				2189	Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
				2190	Context.insert(InstFakeUse::create(Func, SP));
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2191	}
				2192
				2193	void TargetARM32::lowerSelect(const InstSelect *Inst) {
Jan Voung	e0df91f	2015-06-30 08:47:06 -0700	[diff] [blame]	2194	Variable *Dest = Inst->getDest();
				2195	Type DestTy = Dest->getType();
				2196	Operand *SrcT = Inst->getTrueOperand();
				2197	Operand *SrcF = Inst->getFalseOperand();
				2198	Operand *Condition = Inst->getCondition();
				2199
				2200	if (isVectorType(DestTy)) {
				2201	UnimplementedError(Func->getContext()->getFlags());
				2202	return;
				2203	}
				2204	if (isFloatingType(DestTy)) {
				2205	UnimplementedError(Func->getContext()->getFlags());
				2206	return;
				2207	}
				2208	// TODO(jvoung): handle folding opportunities.
				2209	// cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
				2210	Variable *CmpOpnd0 = legalizeToVar(Condition);
				2211	Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
				2212	_cmp(CmpOpnd0, CmpOpnd1);
				2213	CondARM32::Cond Cond = CondARM32::NE;
				2214	if (DestTy == IceType_i64) {
				2215	// Set the low portion.
				2216	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				2217	Variable *TLo = nullptr;
				2218	Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg \| Legal_Flex);
				2219	_mov(TLo, SrcFLo);
				2220	Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg \| Legal_Flex);
				2221	_mov_nonkillable(TLo, SrcTLo, Cond);
				2222	_mov(DestLo, TLo);
				2223	// Set the high portion.
				2224	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				2225	Variable *THi = nullptr;
				2226	Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg \| Legal_Flex);
				2227	_mov(THi, SrcFHi);
				2228	Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Flex);
				2229	_mov_nonkillable(THi, SrcTHi, Cond);
				2230	_mov(DestHi, THi);
				2231	return;
				2232	}
				2233	Variable *T = nullptr;
				2234	SrcF = legalize(SrcF, Legal_Reg \| Legal_Flex);
				2235	_mov(T, SrcF);
				2236	SrcT = legalize(SrcT, Legal_Reg \| Legal_Flex);
				2237	_mov_nonkillable(T, SrcT, Cond);
				2238	_mov(Dest, T);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2239	}
				2240
				2241	void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2242	Operand *Value = Inst->getData();
				2243	Operand *Addr = Inst->getAddr();
				2244	OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
				2245	Type Ty = NewAddr->getType();
				2246
				2247	if (Ty == IceType_i64) {
				2248	Variable *ValueHi = legalizeToVar(hiOperand(Value));
				2249	Variable *ValueLo = legalizeToVar(loOperand(Value));
				2250	_str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
				2251	_str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
				2252	} else if (isVectorType(Ty)) {
				2253	UnimplementedError(Func->getContext()->getFlags());
				2254	} else {
				2255	Variable *ValueR = legalizeToVar(Value);
				2256	_str(ValueR, NewAddr);
				2257	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2258	}
				2259
				2260	void TargetARM32::doAddressOptStore() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2261	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2262	}
				2263
				2264	void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
Andrew Scull	fdc54db	2015-06-29 11:21:18 -0700	[diff] [blame]	2265	// This implements the most naive possible lowering.
				2266	// cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
				2267	Operand *Src0 = Inst->getComparison();
				2268	SizeT NumCases = Inst->getNumCases();
				2269	if (Src0->getType() == IceType_i64) {
				2270	// TODO(jvoung): handle and test undef for Src0
				2271	Variable *Src0Lo = legalizeToVar(loOperand(Src0));
				2272	Variable *Src0Hi = legalizeToVar(hiOperand(Src0));
				2273	for (SizeT I = 0; I < NumCases; ++I) {
				2274	Operand *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
				2275	Operand *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
				2276	ValueLo = legalize(ValueLo, Legal_Reg \| Legal_Flex);
				2277	ValueHi = legalize(ValueHi, Legal_Reg \| Legal_Flex);
				2278	_cmp(Src0Lo, ValueLo);
				2279	_cmp(Src0Hi, ValueHi, CondARM32::EQ);
				2280	_br(Inst->getLabel(I), CondARM32::EQ);
				2281	}
				2282	_br(Inst->getLabelDefault());
				2283	return;
				2284	}
Jan Voung	e0df91f	2015-06-30 08:47:06 -0700	[diff] [blame]	2285
Andrew Scull	fdc54db	2015-06-29 11:21:18 -0700	[diff] [blame]	2286	// 32 bit integer
				2287	Variable *Src0Var = legalizeToVar(Src0);
				2288	for (SizeT I = 0; I < NumCases; ++I) {
				2289	Operand *Value = Ctx->getConstantInt32(Inst->getValue(I));
				2290	Value = legalize(Value, Legal_Reg \| Legal_Flex);
				2291	_cmp(Src0Var, Value);
				2292	_br(Inst->getLabel(I), CondARM32::EQ);
				2293	}
				2294	_br(Inst->getLabelDefault());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2295	}
				2296
				2297	void TargetARM32::lowerUnreachable(const InstUnreachable * /Inst/) {
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	2298	_trap();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2299	}
				2300
				2301	// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
				2302	// preserve integrity of liveness analysis. Undef values are also
				2303	// turned into zeroes, since loOperand() and hiOperand() don't expect
				2304	// Undef input.
				2305	void TargetARM32::prelowerPhis() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2306	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2307	}
				2308
				2309	// Lower the pre-ordered list of assignments into mov instructions.
				2310	// Also has to do some ad-hoc register allocation as necessary.
				2311	void TargetARM32::lowerPhiAssignments(CfgNode *Node,
				2312	const AssignList &Assignments) {
				2313	(void)Node;
				2314	(void)Assignments;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2315	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2316	}
				2317
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2318	Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
				2319	Variable *Reg = makeReg(Ty, RegNum);
				2320	UnimplementedError(Func->getContext()->getFlags());
				2321	return Reg;
				2322	}
				2323
				2324	// Helper for legalize() to emit the right code to lower an operand to a
				2325	// register of the appropriate type.
				2326	Variable TargetARM32::copyToReg(Operand Src, int32_t RegNum) {
				2327	Type Ty = Src->getType();
				2328	Variable *Reg = makeReg(Ty, RegNum);
				2329	if (isVectorType(Ty)) {
				2330	UnimplementedError(Func->getContext()->getFlags());
				2331	} else {
				2332	// Mov's Src operand can really only be the flexible second operand type
				2333	// or a register. Users should guarantee that.
				2334	_mov(Reg, Src);
				2335	}
				2336	return Reg;
				2337	}
				2338
				2339	Operand TargetARM32::legalize(Operand From, LegalMask Allowed,
				2340	int32_t RegNum) {
				2341	// Assert that a physical register is allowed. To date, all calls
				2342	// to legalize() allow a physical register. Legal_Flex converts
				2343	// registers to the right type OperandARM32FlexReg as needed.
				2344	assert(Allowed & Legal_Reg);
				2345	// Go through the various types of operands:
				2346	// OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
				2347	// Given the above assertion, if type of operand is not legal
				2348	// (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
				2349	// to a register.
				2350	if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
				2351	// Before doing anything with a Mem operand, we need to ensure
				2352	// that the Base and Index components are in physical registers.
				2353	Variable *Base = Mem->getBase();
				2354	Variable *Index = Mem->getIndex();
				2355	Variable *RegBase = nullptr;
				2356	Variable *RegIndex = nullptr;
				2357	if (Base) {
				2358	RegBase = legalizeToVar(Base);
				2359	}
				2360	if (Index) {
				2361	RegIndex = legalizeToVar(Index);
				2362	}
				2363	// Create a new operand if there was a change.
				2364	if (Base != RegBase \|\| Index != RegIndex) {
				2365	// There is only a reg +/- reg or reg + imm form.
				2366	// Figure out which to re-create.
				2367	if (Mem->isRegReg()) {
				2368	Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase, RegIndex,
				2369	Mem->getShiftOp(), Mem->getShiftAmt(),
				2370	Mem->getAddrMode());
				2371	} else {
				2372	Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase,
				2373	Mem->getOffset(), Mem->getAddrMode());
				2374	}
				2375	}
				2376	if (!(Allowed & Legal_Mem)) {
				2377	Type Ty = Mem->getType();
				2378	Variable *Reg = makeReg(Ty, RegNum);
				2379	_ldr(Reg, Mem);
				2380	From = Reg;
				2381	} else {
				2382	From = Mem;
				2383	}
				2384	return From;
				2385	}
				2386
				2387	if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
				2388	if (!(Allowed & Legal_Flex)) {
				2389	if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
				2390	if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
				2391	From = FlexReg->getReg();
				2392	// Fall through and let From be checked as a Variable below,
				2393	// where it may or may not need a register.
				2394	} else {
				2395	return copyToReg(Flex, RegNum);
				2396	}
				2397	} else {
				2398	return copyToReg(Flex, RegNum);
				2399	}
				2400	} else {
				2401	return From;
				2402	}
				2403	}
				2404
				2405	if (llvm::isa<Constant>(From)) {
				2406	if (llvm::isa<ConstantUndef>(From)) {
				2407	// Lower undefs to zero. Another option is to lower undefs to an
				2408	// uninitialized register; however, using an uninitialized register
				2409	// results in less predictable code.
				2410	if (isVectorType(From->getType()))
				2411	return makeVectorOfZeros(From->getType(), RegNum);
				2412	From = Ctx->getConstantZero(From->getType());
				2413	}
				2414	// There should be no constants of vector type (other than undef).
				2415	assert(!isVectorType(From->getType()));
				2416	bool CanBeFlex = Allowed & Legal_Flex;
				2417	if (auto C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
				2418	uint32_t RotateAmt;
				2419	uint32_t Immed_8;
				2420	uint32_t Value = static_cast<uint32_t>(C32->getValue());
				2421	// Check if the immediate will fit in a Flexible second operand,
				2422	// if a Flexible second operand is allowed. We need to know the exact
				2423	// value, so that rules out relocatable constants.
				2424	// Also try the inverse and use MVN if possible.
				2425	if (CanBeFlex &&
				2426	OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
				2427	return OperandARM32FlexImm::create(Func, From->getType(), Immed_8,
				2428	RotateAmt);
				2429	} else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
				2430	~Value, &RotateAmt, &Immed_8)) {
				2431	auto InvertedFlex = OperandARM32FlexImm::create(Func, From->getType(),
				2432	Immed_8, RotateAmt);
				2433	Type Ty = From->getType();
				2434	Variable *Reg = makeReg(Ty, RegNum);
				2435	_mvn(Reg, InvertedFlex);
				2436	return Reg;
				2437	} else {
				2438	// Do a movw/movt to a register.
				2439	Type Ty = From->getType();
				2440	Variable *Reg = makeReg(Ty, RegNum);
				2441	uint32_t UpperBits = (Value >> 16) & 0xFFFF;
				2442	_movw(Reg,
				2443	UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
				2444	if (UpperBits != 0) {
				2445	_movt(Reg, Ctx->getConstantInt32(UpperBits));
				2446	}
				2447	return Reg;
				2448	}
				2449	} else if (auto C = llvm::dyn_cast<ConstantRelocatable>(From)) {
				2450	Type Ty = From->getType();
				2451	Variable *Reg = makeReg(Ty, RegNum);
				2452	_movw(Reg, C);
				2453	_movt(Reg, C);
				2454	return Reg;
				2455	} else {
				2456	// Load floats/doubles from literal pool.
				2457	UnimplementedError(Func->getContext()->getFlags());
				2458	From = copyToReg(From, RegNum);
				2459	}
				2460	return From;
				2461	}
				2462
				2463	if (auto Var = llvm::dyn_cast<Variable>(From)) {
				2464	// Check if the variable is guaranteed a physical register. This
				2465	// can happen either when the variable is pre-colored or when it is
				2466	// assigned infinite weight.
				2467	bool MustHaveRegister = (Var->hasReg() \|\| Var->getWeight().isInf());
				2468	// We need a new physical register for the operand if:
				2469	// Mem is not allowed and Var isn't guaranteed a physical
				2470	// register, or
				2471	// RegNum is required and Var->getRegNum() doesn't match.
				2472	if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|
				2473	(RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
				2474	From = copyToReg(From, RegNum);
				2475	}
				2476	return From;
				2477	}
				2478	llvm_unreachable("Unhandled operand kind in legalize()");
				2479
				2480	return From;
				2481	}
				2482
				2483	// Provide a trivial wrapper to legalize() for this common usage.
				2484	Variable TargetARM32::legalizeToVar(Operand From, int32_t RegNum) {
				2485	return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
				2486	}
				2487
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2488	OperandARM32Mem TargetARM32::formMemoryOperand(Operand Operand, Type Ty) {
				2489	OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
				2490	// It may be the case that address mode optimization already creates
				2491	// an OperandARM32Mem, so in that case it wouldn't need another level
				2492	// of transformation.
				2493	if (Mem) {
				2494	return llvm::cast<OperandARM32Mem>(legalize(Mem));
				2495	}
				2496	// If we didn't do address mode optimization, then we only
				2497	// have a base/offset to work with. ARM always requires a base
				2498	// register, so just use that to hold the operand.
				2499	Variable *Base = legalizeToVar(Operand);
				2500	return OperandARM32Mem::create(
				2501	Func, Ty, Base,
				2502	llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
				2503	}
				2504
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2505	Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
				2506	// There aren't any 64-bit integer registers for ARM32.
				2507	assert(Type != IceType_i64);
				2508	Variable *Reg = Func->makeVariable(Type);
				2509	if (RegNum == Variable::NoRegister)
				2510	Reg->setWeightInfinite();
				2511	else
				2512	Reg->setRegNum(RegNum);
				2513	return Reg;
				2514	}
				2515
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2516	void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
				2517	assert(llvm::isPowerOf2_32(Align));
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	2518	uint32_t RotateAmt;
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2519	uint32_t Immed_8;
				2520	Operand *Mask;
				2521	// Use AND or BIC to mask off the bits, depending on which immediate fits
				2522	// (if it fits at all). Assume Align is usually small, in which case BIC
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	2523	// works better. Thus, this rounds down to the alignment.
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2524	if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
				2525	Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg \| Legal_Flex);
				2526	_bic(Reg, Reg, Mask);
				2527	} else {
				2528	Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg \| Legal_Flex);
				2529	_and(Reg, Reg, Mask);
				2530	}
				2531	}
				2532
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2533	void TargetARM32::postLower() {
				2534	if (Ctx->getFlags().getOptLevel() == Opt_m1)
				2535	return;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2536	inferTwoAddress();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2537	}
				2538
				2539	void TargetARM32::makeRandomRegisterPermutation(
				2540	llvm::SmallVectorImpl<int32_t> &Permutation,
				2541	const llvm::SmallBitVector &ExcludeRegisters) const {
				2542	(void)Permutation;
				2543	(void)ExcludeRegisters;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2544	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2545	}
				2546
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2547	void TargetARM32::emit(const ConstantInteger32 *C) const {
Jim Stichnoth	20b71f5	2015-06-24 15:52:24 -0700	[diff] [blame]	2548	if (!BuildDefs::dump())
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2549	return;
				2550	Ostream &Str = Ctx->getStrEmit();
				2551	Str << getConstantPrefix() << C->getValue();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2552	}
				2553
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2554	void TargetARM32::emit(const ConstantInteger64 *) const {
				2555	llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2556	}
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2557
				2558	void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2559	(void)C;
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2560	UnimplementedError(Ctx->getFlags());
				2561	}
				2562
				2563	void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2564	(void)C;
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2565	UnimplementedError(Ctx->getFlags());
				2566	}
				2567
				2568	void TargetARM32::emit(const ConstantUndef *) const {
				2569	llvm::report_fatal_error("undef value encountered by emitter.");
				2570	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2571
				2572	TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
				2573	: TargetDataLowering(Ctx) {}
				2574
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2575	void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
				2576	const IceString &SectionSuffix) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2577	switch (Ctx->getFlags().getOutFileType()) {
				2578	case FT_Elf: {
				2579	ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2580	Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2581	} break;
				2582	case FT_Asm:
				2583	case FT_Iasm: {
				2584	const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
				2585	OstreamLocker L(Ctx);
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2586	for (const VariableDeclaration *Var : Vars) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2587	if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2588	emitGlobal(*Var, SectionSuffix);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2589	}
				2590	}
				2591	} break;
				2592	}
				2593	}
				2594
John Porto	0f86d03	2015-06-15 07:44:27 -0700	[diff] [blame]	2595	void TargetDataARM32::lowerConstants() {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2596	if (Ctx->getFlags().getDisableTranslation())
				2597	return;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2598	UnimplementedError(Ctx->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2599	}
				2600
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2601	TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	2602	: TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2603
				2604	void TargetHeaderARM32::lower() {
				2605	OstreamLocker L(Ctx);
				2606	Ostream &Str = Ctx->getStrEmit();
				2607	Str << ".syntax unified\n";
				2608	// Emit build attributes in format: .eabi_attribute TAG, VALUE.
				2609	// See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
				2610	// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
				2611	//
				2612	// Tag_conformance should be be emitted first in a file-scope
				2613	// sub-subsection of the first public subsection of the attributes.
				2614	Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
				2615	// Chromebooks are at least A15, but do A9 for higher compat.
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	2616	// For some reason, the LLVM ARM asm parser has the .cpu directive override
				2617	// the mattr specified on the commandline. So to test hwdiv, we need to set
				2618	// the .cpu directive higher (can't just rely on --mattr=...).
				2619	if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
				2620	Str << ".cpu cortex-a15\n";
				2621	} else {
				2622	Str << ".cpu cortex-a9\n";
				2623	}
				2624	Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2625	<< ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
				2626	Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
				2627	<< ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2628	Str << ".fpu neon\n"
				2629	<< ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
				2630	<< ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
				2631	<< ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
				2632	<< ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
				2633	<< ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
				2634	<< ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
				2635	<< ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
				2636	<< ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
				2637	<< ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
				2638	<< ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
				2639	<< ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
				2640	<< ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
Jan Voung	6ec369e	2015-06-30 11:03:15 -0700	[diff] [blame]	2641	if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
				2642	Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
				2643	}
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2644	// Technically R9 is used for TLS with Sandboxing, and we reserve it.
				2645	// However, for compatibility with current NaCl LLVM, don't claim that.
				2646	Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
				2647	}
				2648
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2649	} // end of namespace Ice