Blame - src/IceTargetLoweringARM32.cpp - SwiftShader

blob: 9bb2386b387ff42631ad4dce6e8361e1f7b0e7f9 [file] [log] [blame]

Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1	//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
				2	//
				3	// The Subzero Code Generator
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the TargetLoweringARM32 class, which consists almost
				11	// entirely of the lowering sequence for each high-level instruction.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "llvm/Support/MathExtras.h"
				16
				17	#include "IceCfg.h"
				18	#include "IceCfgNode.h"
				19	#include "IceClFlags.h"
				20	#include "IceDefs.h"
				21	#include "IceELFObjectWriter.h"
				22	#include "IceGlobalInits.h"
				23	#include "IceInstARM32.h"
				24	#include "IceLiveness.h"
				25	#include "IceOperand.h"
				26	#include "IceRegistersARM32.h"
				27	#include "IceTargetLoweringARM32.def"
				28	#include "IceTargetLoweringARM32.h"
				29	#include "IceUtils.h"
				30
				31	namespace Ice {
				32
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	33	namespace {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	34
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	35	void UnimplementedError(const ClFlags &Flags) {
				36	if (!Flags.getSkipUnimplemented()) {
				37	// Use llvm_unreachable instead of report_fatal_error, which gives better
				38	// stack traces.
				39	llvm_unreachable("Not yet implemented");
				40	abort();
				41	}
				42	}
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	43
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	44	// The following table summarizes the logic for lowering the icmp instruction
				45	// for i32 and narrower types. Each icmp condition has a clear mapping to an
				46	// ARM32 conditional move instruction.
				47
				48	const struct TableIcmp32_ {
				49	CondARM32::Cond Mapping;
				50	} TableIcmp32[] = {
				51	#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
				52	{ CondARM32::C_32 } \
				53	,
				54	ICMPARM32_TABLE
				55	#undef X
				56	};
				57	const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
				58
				59	// The following table summarizes the logic for lowering the icmp instruction
				60	// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
				61	// The operands may need to be swapped, and there is a slight difference
				62	// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
				63	const struct TableIcmp64_ {
				64	bool IsSigned;
				65	bool Swapped;
				66	CondARM32::Cond C1, C2;
				67	} TableIcmp64[] = {
				68	#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
				69	{ is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
				70	,
				71	ICMPARM32_TABLE
				72	#undef X
				73	};
				74	const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
				75
				76	CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
				77	size_t Index = static_cast<size_t>(Cond);
				78	assert(Index < TableIcmp32Size);
				79	return TableIcmp32[Index].Mapping;
				80	}
				81
				82	// In some cases, there are x-macros tables for both high-level and
				83	// low-level instructions/operands that use the same enum key value.
				84	// The tables are kept separate to maintain a proper separation
				85	// between abstraction layers. There is a risk that the tables could
				86	// get out of sync if enum values are reordered or if entries are
				87	// added or deleted. The following dummy namespaces use
				88	// static_asserts to ensure everything is kept in sync.
				89
				90	// Validate the enum values in ICMPARM32_TABLE.
				91	namespace dummy1 {
				92	// Define a temporary set of enum values based on low-level table
				93	// entries.
				94	enum _tmp_enum {
				95	#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
				96	ICMPARM32_TABLE
				97	#undef X
				98	_num
				99	};
				100	// Define a set of constants based on high-level table entries.
				101	#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
				102	ICEINSTICMP_TABLE
				103	#undef X
				104	// Define a set of constants based on low-level table entries, and
				105	// ensure the table entry keys are consistent.
				106	#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
				107	static const int _table2_##val = _tmp_##val; \
				108	static_assert( \
				109	_table1_##val == _table2_##val, \
				110	"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
				111	ICMPARM32_TABLE
				112	#undef X
				113	// Repeat the static asserts with respect to the high-level table
				114	// entries in case the high-level table has extra entries.
				115	#define X(tag, str) \
				116	static_assert( \
				117	_table1_##tag == _table2_##tag, \
				118	"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
				119	ICEINSTICMP_TABLE
				120	#undef X
				121	} // end of namespace dummy1
				122
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	123	// The maximum number of arguments to pass in GPR registers.
				124	const uint32_t ARM32_MAX_GPR_ARG = 4;
				125
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	126	// Stack alignment
				127	const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
				128
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	129	// Value is in bytes. Return Value adjusted to the next highest multiple
				130	// of the stack alignment.
				131	uint32_t applyStackAlignment(uint32_t Value) {
				132	return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
				133	}
				134
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	135	} // end of anonymous namespace
				136
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	137	TargetARM32::TargetARM32(Cfg *Func)
Jan Voung	d062f73	2015-06-15 17:17:31 -0700	[diff] [blame]	138	: TargetLowering(Func), InstructionSet(ARM32InstructionSet::Begin),
				139	UsesFramePointer(false), NeedsStackAlignment(false), MaybeLeafFunc(true),
				140	SpillAreaSizeBytes(0) {
				141	static_assert(
				142	(ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
				143	(TargetInstructionSet::ARM32InstructionSet_End -
				144	TargetInstructionSet::ARM32InstructionSet_Begin),
				145	"ARM32InstructionSet range different from TargetInstructionSet");
				146	if (Func->getContext()->getFlags().getTargetInstructionSet() !=
				147	TargetInstructionSet::BaseInstructionSet) {
				148	InstructionSet = static_cast<ARM32InstructionSet>(
				149	(Func->getContext()->getFlags().getTargetInstructionSet() -
				150	TargetInstructionSet::ARM32InstructionSet_Begin) +
				151	ARM32InstructionSet::Begin);
				152	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	153	// TODO: Don't initialize IntegerRegisters and friends every time.
				154	// Instead, initialize in some sort of static initializer for the
				155	// class.
				156	llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
				157	llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);
				158	llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
				159	llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
				160	ScratchRegs.resize(RegARM32::Reg_NUM);
				161	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				162	isFP) \
				163	IntegerRegisters[RegARM32::val] = isInt; \
				164	FloatRegisters[RegARM32::val] = isFP; \
				165	VectorRegisters[RegARM32::val] = isFP; \
				166	ScratchRegs[RegARM32::val] = scratch;
				167	REGARM32_TABLE;
				168	#undef X
				169	TypeToRegisterSet[IceType_void] = InvalidRegisters;
				170	TypeToRegisterSet[IceType_i1] = IntegerRegisters;
				171	TypeToRegisterSet[IceType_i8] = IntegerRegisters;
				172	TypeToRegisterSet[IceType_i16] = IntegerRegisters;
				173	TypeToRegisterSet[IceType_i32] = IntegerRegisters;
				174	TypeToRegisterSet[IceType_i64] = IntegerRegisters;
				175	TypeToRegisterSet[IceType_f32] = FloatRegisters;
				176	TypeToRegisterSet[IceType_f64] = FloatRegisters;
				177	TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
				178	TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
				179	TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
				180	TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
				181	TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
				182	TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
				183	TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
				184	}
				185
				186	void TargetARM32::translateO2() {
				187	TimerMarker T(TimerStack::TT_O2, Func);
				188
				189	// TODO(stichnot): share passes with X86?
				190	// https://code.google.com/p/nativeclient/issues/detail?id=4094
				191
				192	if (!Ctx->getFlags().getPhiEdgeSplit()) {
				193	// Lower Phi instructions.
				194	Func->placePhiLoads();
				195	if (Func->hasError())
				196	return;
				197	Func->placePhiStores();
				198	if (Func->hasError())
				199	return;
				200	Func->deletePhis();
				201	if (Func->hasError())
				202	return;
				203	Func->dump("After Phi lowering");
				204	}
				205
				206	// Address mode optimization.
				207	Func->getVMetadata()->init(VMK_SingleDefs);
				208	Func->doAddressOpt();
				209
				210	// Argument lowering
				211	Func->doArgLowering();
				212
				213	// Target lowering. This requires liveness analysis for some parts
				214	// of the lowering decisions, such as compare/branch fusing. If
				215	// non-lightweight liveness analysis is used, the instructions need
				216	// to be renumbered first. TODO: This renumbering should only be
				217	// necessary if we're actually calculating live intervals, which we
				218	// only do for register allocation.
				219	Func->renumberInstructions();
				220	if (Func->hasError())
				221	return;
				222
				223	// TODO: It should be sufficient to use the fastest liveness
				224	// calculation, i.e. livenessLightweight(). However, for some
				225	// reason that slows down the rest of the translation. Investigate.
				226	Func->liveness(Liveness_Basic);
				227	if (Func->hasError())
				228	return;
				229	Func->dump("After ARM32 address mode opt");
				230
				231	Func->genCode();
				232	if (Func->hasError())
				233	return;
				234	Func->dump("After ARM32 codegen");
				235
				236	// Register allocation. This requires instruction renumbering and
				237	// full liveness analysis.
				238	Func->renumberInstructions();
				239	if (Func->hasError())
				240	return;
				241	Func->liveness(Liveness_Intervals);
				242	if (Func->hasError())
				243	return;
				244	// Validate the live range computations. The expensive validation
				245	// call is deliberately only made when assertions are enabled.
				246	assert(Func->validateLiveness());
				247	// The post-codegen dump is done here, after liveness analysis and
				248	// associated cleanup, to make the dump cleaner and more useful.
				249	Func->dump("After initial ARM32 codegen");
				250	Func->getVMetadata()->init(VMK_All);
				251	regAlloc(RAK_Global);
				252	if (Func->hasError())
				253	return;
				254	Func->dump("After linear scan regalloc");
				255
				256	if (Ctx->getFlags().getPhiEdgeSplit()) {
				257	Func->advancedPhiLowering();
				258	Func->dump("After advanced Phi lowering");
				259	}
				260
				261	// Stack frame mapping.
				262	Func->genFrame();
				263	if (Func->hasError())
				264	return;
				265	Func->dump("After stack frame mapping");
				266
				267	Func->contractEmptyNodes();
				268	Func->reorderNodes();
				269
				270	// Branch optimization. This needs to be done just before code
				271	// emission. In particular, no transformations that insert or
				272	// reorder CfgNodes should be done after branch optimization. We go
				273	// ahead and do it before nop insertion to reduce the amount of work
				274	// needed for searching for opportunities.
				275	Func->doBranchOpt();
				276	Func->dump("After branch optimization");
				277
				278	// Nop insertion
				279	if (Ctx->getFlags().shouldDoNopInsertion()) {
				280	Func->doNopInsertion();
				281	}
				282	}
				283
				284	void TargetARM32::translateOm1() {
				285	TimerMarker T(TimerStack::TT_Om1, Func);
				286
				287	// TODO: share passes with X86?
				288
				289	Func->placePhiLoads();
				290	if (Func->hasError())
				291	return;
				292	Func->placePhiStores();
				293	if (Func->hasError())
				294	return;
				295	Func->deletePhis();
				296	if (Func->hasError())
				297	return;
				298	Func->dump("After Phi lowering");
				299
				300	Func->doArgLowering();
				301
				302	Func->genCode();
				303	if (Func->hasError())
				304	return;
				305	Func->dump("After initial ARM32 codegen");
				306
				307	regAlloc(RAK_InfOnly);
				308	if (Func->hasError())
				309	return;
				310	Func->dump("After regalloc of infinite-weight variables");
				311
				312	Func->genFrame();
				313	if (Func->hasError())
				314	return;
				315	Func->dump("After stack frame mapping");
				316
				317	// Nop insertion
				318	if (Ctx->getFlags().shouldDoNopInsertion()) {
				319	Func->doNopInsertion();
				320	}
				321	}
				322
				323	bool TargetARM32::doBranchOpt(Inst I, const CfgNode NextNode) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	324	if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
				325	return Br->optimizeBranch(NextNode);
				326	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	327	return false;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	328	}
				329
				330	IceString TargetARM32::RegNames[] = {
				331	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				332	isFP) \
				333	name,
				334	REGARM32_TABLE
				335	#undef X
				336	};
				337
				338	IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
				339	assert(RegNum < RegARM32::Reg_NUM);
				340	(void)Ty;
				341	return RegNames[RegNum];
				342	}
				343
				344	Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
				345	if (Ty == IceType_void)
				346	Ty = IceType_i32;
				347	if (PhysicalRegisters[Ty].empty())
				348	PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
				349	assert(RegNum < PhysicalRegisters[Ty].size());
				350	Variable *Reg = PhysicalRegisters[Ty][RegNum];
				351	if (Reg == nullptr) {
				352	Reg = Func->makeVariable(Ty);
				353	Reg->setRegNum(RegNum);
				354	PhysicalRegisters[Ty][RegNum] = Reg;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	355	// Specially mark SP and LR as an "argument" so that it is considered
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	356	// live upon function entry.
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	357	if (RegNum == RegARM32::Reg_sp \|\| RegNum == RegARM32::Reg_lr) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	358	Func->addImplicitArg(Reg);
				359	Reg->setIgnoreLiveness();
				360	}
				361	}
				362	return Reg;
				363	}
				364
				365	void TargetARM32::emitVariable(const Variable *Var) const {
				366	Ostream &Str = Ctx->getStrEmit();
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	367	if (Var->hasReg()) {
				368	Str << getRegName(Var->getRegNum(), Var->getType());
				369	return;
				370	}
				371	if (Var->getWeight().isInf()) {
				372	llvm::report_fatal_error(
				373	"Infinite-weight Variable has no register assigned");
				374	}
				375	int32_t Offset = Var->getStackOffset();
				376	if (!hasFramePointer())
				377	Offset += getStackAdjustment();
				378	// TODO(jvoung): Handle out of range. Perhaps we need a scratch register
				379	// to materialize a larger offset.
				380	const bool SignExt = false;
				381	if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) {
				382	llvm::report_fatal_error("Illegal stack offset");
				383	}
				384	const Type FrameSPTy = IceType_i32;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	385	Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy);
				386	if (Offset != 0) {
				387	Str << ", " << getConstantPrefix() << Offset;
				388	}
				389	Str << "]";
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	390	}
				391
				392	void TargetARM32::lowerArguments() {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	393	VarList &Args = Func->getArgs();
				394	// The first few integer type parameters can use r0-r3, regardless of their
				395	// position relative to the floating-point/vector arguments in the argument
				396	// list. Floating-point and vector arguments can use q0-q3 (aka d0-d7,
				397	// s0-s15).
				398	unsigned NumGPRRegsUsed = 0;
				399
				400	// For each register argument, replace Arg in the argument list with the
				401	// home register. Then generate an instruction in the prolog to copy the
				402	// home register to the assigned location of Arg.
				403	Context.init(Func->getEntryNode());
				404	Context.setInsertPoint(Context.getCur());
				405
				406	for (SizeT I = 0, E = Args.size(); I < E; ++I) {
				407	Variable *Arg = Args[I];
				408	Type Ty = Arg->getType();
				409	// TODO(jvoung): handle float/vector types.
				410	if (isVectorType(Ty)) {
				411	UnimplementedError(Func->getContext()->getFlags());
				412	continue;
				413	} else if (isFloatingType(Ty)) {
				414	UnimplementedError(Func->getContext()->getFlags());
				415	continue;
				416	} else if (Ty == IceType_i64) {
				417	if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
				418	continue;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	419	int32_t RegLo;
				420	int32_t RegHi;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	421	// Always start i64 registers at an even register, so this may end
				422	// up padding away a register.
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	423	if (NumGPRRegsUsed % 2 != 0) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	424	++NumGPRRegsUsed;
				425	}
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	426	RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
				427	++NumGPRRegsUsed;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	428	RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
				429	++NumGPRRegsUsed;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	430	// If this bumps us past the boundary, don't allocate to a register
				431	// and leave any previously speculatively consumed registers as consumed.
				432	if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
				433	continue;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	434	Variable *RegisterArg = Func->makeVariable(Ty);
				435	Variable *RegisterLo = Func->makeVariable(IceType_i32);
				436	Variable *RegisterHi = Func->makeVariable(IceType_i32);
				437	if (ALLOW_DUMP) {
				438	RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
				439	RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
				440	RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
				441	}
				442	RegisterLo->setRegNum(RegLo);
				443	RegisterLo->setIsArg();
				444	RegisterHi->setRegNum(RegHi);
				445	RegisterHi->setIsArg();
				446	RegisterArg->setLoHi(RegisterLo, RegisterHi);
				447	RegisterArg->setIsArg();
				448	Arg->setIsArg(false);
				449
				450	Args[I] = RegisterArg;
				451	Context.insert(InstAssign::create(Func, Arg, RegisterArg));
				452	continue;
				453	} else {
				454	assert(Ty == IceType_i32);
				455	if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
				456	continue;
				457	int32_t RegNum = RegARM32::Reg_r0 + NumGPRRegsUsed;
				458	++NumGPRRegsUsed;
				459	Variable *RegisterArg = Func->makeVariable(Ty);
				460	if (ALLOW_DUMP) {
				461	RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
				462	}
				463	RegisterArg->setRegNum(RegNum);
				464	RegisterArg->setIsArg();
				465	Arg->setIsArg(false);
				466
				467	Args[I] = RegisterArg;
				468	Context.insert(InstAssign::create(Func, Arg, RegisterArg));
				469	}
				470	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	471	}
				472
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	473	// Helper function for addProlog().
				474	//
				475	// This assumes Arg is an argument passed on the stack. This sets the
				476	// frame offset for Arg and updates InArgsSizeBytes according to Arg's
				477	// width. For an I64 arg that has been split into Lo and Hi components,
				478	// it calls itself recursively on the components, taking care to handle
				479	// Lo first because of the little-endian architecture. Lastly, this
				480	// function generates an instruction to copy Arg into its assigned
				481	// register if applicable.
				482	void TargetARM32::finishArgumentLowering(Variable Arg, Variable FramePtr,
				483	size_t BasicFrameOffset,
				484	size_t &InArgsSizeBytes) {
				485	Variable *Lo = Arg->getLo();
				486	Variable *Hi = Arg->getHi();
				487	Type Ty = Arg->getType();
				488	if (Lo && Hi && Ty == IceType_i64) {
				489	assert(Lo->getType() != IceType_i64); // don't want infinite recursion
				490	assert(Hi->getType() != IceType_i64); // don't want infinite recursion
				491	finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				492	finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				493	return;
				494	}
				495	if (isVectorType(Ty)) {
				496	InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
				497	}
				498	Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
				499	InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
				500	// If the argument variable has been assigned a register, we need to load
				501	// the value from the stack slot.
				502	if (Arg->hasReg()) {
				503	assert(Ty != IceType_i64);
				504	OperandARM32Mem *Mem = OperandARM32Mem::create(
				505	Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
				506	Ctx->getConstantInt32(Arg->getStackOffset())));
				507	if (isVectorType(Arg->getType())) {
				508	UnimplementedError(Func->getContext()->getFlags());
				509	} else {
				510	_ldr(Arg, Mem);
				511	}
				512	// This argument-copying instruction uses an explicit
				513	// OperandARM32Mem operand instead of a Variable, so its
				514	// fill-from-stack operation has to be tracked separately for
				515	// statistics.
				516	Ctx->statsUpdateFills();
				517	}
				518	}
				519
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	520	Type TargetARM32::stackSlotType() { return IceType_i32; }
				521
				522	void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	523	// Stack frame layout:
				524	//
				525	// +------------------------+
				526	// \| 1. preserved registers \|
				527	// +------------------------+
				528	// \| 2. padding \|
				529	// +------------------------+
				530	// \| 3. global spill area \|
				531	// +------------------------+
				532	// \| 4. padding \|
				533	// +------------------------+
				534	// \| 5. local spill area \|
				535	// +------------------------+
				536	// \| 6. padding \|
				537	// +------------------------+
				538	// \| 7. allocas \|
				539	// +------------------------+
				540	//
				541	// The following variables record the size in bytes of the given areas:
				542	// * PreservedRegsSizeBytes: area 1
				543	// * SpillAreaPaddingBytes: area 2
				544	// * GlobalsSize: area 3
				545	// * GlobalsAndSubsequentPaddingSize: areas 3 - 4
				546	// * LocalsSpillAreaSize: area 5
				547	// * SpillAreaSizeBytes: areas 2 - 6
				548	// Determine stack frame offsets for each Variable without a
				549	// register assignment. This can be done as one variable per stack
				550	// slot. Or, do coalescing by running the register allocator again
				551	// with an infinite set of registers (as a side effect, this gives
				552	// variables a second chance at physical register assignment).
				553	//
				554	// A middle ground approach is to leverage sparsity and allocate one
				555	// block of space on the frame for globals (variables with
				556	// multi-block lifetime), and one block to share for locals
				557	// (single-block lifetime).
				558
				559	Context.init(Node);
				560	Context.setInsertPoint(Context.getCur());
				561
				562	llvm::SmallBitVector CalleeSaves =
				563	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				564	RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
				565	VarList SortedSpilledVariables;
				566	size_t GlobalsSize = 0;
				567	// If there is a separate locals area, this represents that area.
				568	// Otherwise it counts any variable not counted by GlobalsSize.
				569	SpillAreaSizeBytes = 0;
				570	// If there is a separate locals area, this specifies the alignment
				571	// for it.
				572	uint32_t LocalsSlotsAlignmentBytes = 0;
				573	// The entire spill locations area gets aligned to largest natural
				574	// alignment of the variables that have a spill slot.
				575	uint32_t SpillAreaAlignmentBytes = 0;
				576	// For now, we don't have target-specific variables that need special
				577	// treatment (no stack-slot-linked SpillVariable type).
				578	std::function<bool(Variable *)> TargetVarHook =
				579	[](Variable *) { return false; };
				580
				581	// Compute the list of spilled variables and bounds for GlobalsSize, etc.
				582	getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
				583	&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
				584	&LocalsSlotsAlignmentBytes, TargetVarHook);
				585	uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
				586	SpillAreaSizeBytes += GlobalsSize;
				587
				588	// Add push instructions for preserved registers.
				589	// On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
				590	// Unlike x86, ARM also has callee-saved float/vector registers.
				591	// The "vpush" instruction can handle a whole list of float/vector
				592	// registers, but it only handles contiguous sequences of registers
				593	// by specifying the start and the length.
				594	VarList GPRsToPreserve;
				595	GPRsToPreserve.reserve(CalleeSaves.size());
				596	uint32_t NumCallee = 0;
				597	size_t PreservedRegsSizeBytes = 0;
				598	// Consider FP and LR as callee-save / used as needed.
				599	if (UsesFramePointer) {
				600	CalleeSaves[RegARM32::Reg_fp] = true;
				601	assert(RegsUsed[RegARM32::Reg_fp] == false);
				602	RegsUsed[RegARM32::Reg_fp] = true;
				603	}
				604	if (!MaybeLeafFunc) {
				605	CalleeSaves[RegARM32::Reg_lr] = true;
				606	RegsUsed[RegARM32::Reg_lr] = true;
				607	}
				608	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				609	if (CalleeSaves[i] && RegsUsed[i]) {
				610	// TODO(jvoung): do separate vpush for each floating point
				611	// register segment and += 4, or 8 depending on type.
				612	++NumCallee;
				613	PreservedRegsSizeBytes += 4;
				614	GPRsToPreserve.push_back(getPhysicalRegister(i));
				615	}
				616	}
				617	Ctx->statsUpdateRegistersSaved(NumCallee);
				618	if (!GPRsToPreserve.empty())
				619	_push(GPRsToPreserve);
				620
				621	// Generate "mov FP, SP" if needed.
				622	if (UsesFramePointer) {
				623	Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
				624	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				625	_mov(FP, SP);
				626	// Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
				627	Context.insert(InstFakeUse::create(Func, FP));
				628	}
				629
				630	// Align the variables area. SpillAreaPaddingBytes is the size of
				631	// the region after the preserved registers and before the spill areas.
				632	// LocalsSlotsPaddingBytes is the amount of padding between the globals
				633	// and locals area if they are separate.
				634	assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
				635	assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
				636	uint32_t SpillAreaPaddingBytes = 0;
				637	uint32_t LocalsSlotsPaddingBytes = 0;
				638	alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
				639	GlobalsSize, LocalsSlotsAlignmentBytes,
				640	&SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
				641	SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
				642	uint32_t GlobalsAndSubsequentPaddingSize =
				643	GlobalsSize + LocalsSlotsPaddingBytes;
				644
				645	// Align SP if necessary.
				646	if (NeedsStackAlignment) {
				647	uint32_t StackOffset = PreservedRegsSizeBytes;
				648	uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
				649	SpillAreaSizeBytes = StackSize - StackOffset;
				650	}
				651
				652	// Generate "sub sp, SpillAreaSizeBytes"
				653	if (SpillAreaSizeBytes) {
				654	// Use the IP inter-procedural scratch register if needed to legalize
				655	// the immediate.
				656	Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
				657	Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);
				658	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				659	_sub(SP, SP, SubAmount);
				660	}
				661	Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
				662
				663	resetStackAdjustment();
				664
				665	// Fill in stack offsets for stack args, and copy args into registers
				666	// for those that were register-allocated. Args are pushed right to
				667	// left, so Arg[0] is closest to the stack/frame pointer.
				668	Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
				669	size_t BasicFrameOffset = PreservedRegsSizeBytes;
				670	if (!UsesFramePointer)
				671	BasicFrameOffset += SpillAreaSizeBytes;
				672
				673	const VarList &Args = Func->getArgs();
				674	size_t InArgsSizeBytes = 0;
				675	unsigned NumGPRArgs = 0;
				676	for (Variable *Arg : Args) {
				677	Type Ty = Arg->getType();
				678	// Skip arguments passed in registers.
				679	if (isVectorType(Ty)) {
				680	UnimplementedError(Func->getContext()->getFlags());
				681	continue;
				682	} else if (isFloatingType(Ty)) {
				683	UnimplementedError(Func->getContext()->getFlags());
				684	continue;
				685	} else if (Ty == IceType_i64 && NumGPRArgs < ARM32_MAX_GPR_ARG) {
				686	// Start at an even register.
				687	if (NumGPRArgs % 2 == 1) {
				688	++NumGPRArgs;
				689	}
				690	NumGPRArgs += 2;
				691	if (NumGPRArgs <= ARM32_MAX_GPR_ARG)
				692	continue;
				693	} else if (NumGPRArgs < ARM32_MAX_GPR_ARG) {
				694	++NumGPRArgs;
				695	continue;
				696	}
				697	finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				698	}
				699
				700	// Fill in stack offsets for locals.
				701	assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
				702	SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
				703	UsesFramePointer);
				704	this->HasComputedFrame = true;
				705
				706	if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
				707	OstreamLocker L(Func->getContext());
				708	Ostream &Str = Func->getContext()->getStrDump();
				709
				710	Str << "Stack layout:\n";
				711	uint32_t SPAdjustmentPaddingSize =
				712	SpillAreaSizeBytes - LocalsSpillAreaSize -
				713	GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
				714	Str << " in-args = " << InArgsSizeBytes << " bytes\n"
				715	<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
				716	<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
				717	<< " globals spill area = " << GlobalsSize << " bytes\n"
				718	<< " globals-locals spill areas intermediate padding = "
				719	<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
				720	<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
				721	<< " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
				722
				723	Str << "Stack details:\n"
				724	<< " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
				725	<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
				726	<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
				727	<< " bytes\n"
				728	<< " is FP based = " << UsesFramePointer << "\n";
				729	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	730	}
				731
				732	void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	733	InstList &Insts = Node->getInsts();
				734	InstList::reverse_iterator RI, E;
				735	for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
				736	if (llvm::isa<InstARM32Ret>(*RI))
				737	break;
				738	}
				739	if (RI == E)
				740	return;
				741
				742	// Convert the reverse_iterator position into its corresponding
				743	// (forward) iterator position.
				744	InstList::iterator InsertPoint = RI.base();
				745	--InsertPoint;
				746	Context.init(Node);
				747	Context.setInsertPoint(InsertPoint);
				748
				749	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				750	if (UsesFramePointer) {
				751	Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
				752	// For late-stage liveness analysis (e.g. asm-verbose mode),
				753	// adding a fake use of SP before the assignment of SP=FP keeps
				754	// previous SP adjustments from being dead-code eliminated.
				755	Context.insert(InstFakeUse::create(Func, SP));
				756	_mov(SP, FP);
				757	} else {
				758	// add SP, SpillAreaSizeBytes
				759	if (SpillAreaSizeBytes) {
				760	// Use the IP inter-procedural scratch register if needed to legalize
				761	// the immediate. It shouldn't be live at this point.
				762	Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
				763	Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);
				764	_add(SP, SP, AddAmount);
				765	}
				766	}
				767
				768	// Add pop instructions for preserved registers.
				769	llvm::SmallBitVector CalleeSaves =
				770	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				771	VarList GPRsToRestore;
				772	GPRsToRestore.reserve(CalleeSaves.size());
				773	// Consider FP and LR as callee-save / used as needed.
				774	if (UsesFramePointer) {
				775	CalleeSaves[RegARM32::Reg_fp] = true;
				776	}
				777	if (!MaybeLeafFunc) {
				778	CalleeSaves[RegARM32::Reg_lr] = true;
				779	}
				780	// Pop registers in ascending order just like push
				781	// (instead of in reverse order).
				782	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				783	if (CalleeSaves[i] && RegsUsed[i]) {
				784	GPRsToRestore.push_back(getPhysicalRegister(i));
				785	}
				786	}
				787	if (!GPRsToRestore.empty())
				788	_pop(GPRsToRestore);
				789
				790	if (!Ctx->getFlags().getUseSandboxing())
				791	return;
				792
				793	// Change the original ret instruction into a sandboxed return sequence.
				794	// bundle_lock
				795	// bic lr, #0xc000000f
				796	// bx lr
				797	// bundle_unlock
				798	// This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
				799	// restrict to the lower 1GB as well.
				800	Operand *RetMask =
				801	legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg \| Legal_Flex);
				802	Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
				803	Variable *RetValue = nullptr;
				804	if (RI->getSrcSize())
				805	RetValue = llvm::cast<Variable>(RI->getSrc(0));
				806	_bundle_lock();
				807	_bic(LR, LR, RetMask);
				808	_ret(LR, RetValue);
				809	_bundle_unlock();
				810	RI->setDeleted();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	811	}
				812
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	813	void TargetARM32::split64(Variable *Var) {
				814	assert(Var->getType() == IceType_i64);
				815	Variable *Lo = Var->getLo();
				816	Variable *Hi = Var->getHi();
				817	if (Lo) {
				818	assert(Hi);
				819	return;
				820	}
				821	assert(Hi == nullptr);
				822	Lo = Func->makeVariable(IceType_i32);
				823	Hi = Func->makeVariable(IceType_i32);
				824	if (ALLOW_DUMP) {
				825	Lo->setName(Func, Var->getName(Func) + "__lo");
				826	Hi->setName(Func, Var->getName(Func) + "__hi");
				827	}
				828	Var->setLoHi(Lo, Hi);
				829	if (Var->getIsArg()) {
				830	Lo->setIsArg();
				831	Hi->setIsArg();
				832	}
				833	}
				834
				835	Operand TargetARM32::loOperand(Operand Operand) {
				836	assert(Operand->getType() == IceType_i64);
				837	if (Operand->getType() != IceType_i64)
				838	return Operand;
				839	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				840	split64(Var);
				841	return Var->getLo();
				842	}
				843	if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
				844	return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
				845	}
				846	if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
				847	// Conservatively disallow memory operands with side-effects (pre/post
				848	// increment) in case of duplication.
				849	assert(Mem->getAddrMode() == OperandARM32Mem::Offset \|\|
				850	Mem->getAddrMode() == OperandARM32Mem::NegOffset);
				851	if (Mem->isRegReg()) {
				852	return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
				853	Mem->getIndex(), Mem->getShiftOp(),
				854	Mem->getShiftAmt(), Mem->getAddrMode());
				855	} else {
				856	return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
				857	Mem->getOffset(), Mem->getAddrMode());
				858	}
				859	}
				860	llvm_unreachable("Unsupported operand type");
				861	return nullptr;
				862	}
				863
				864	Operand TargetARM32::hiOperand(Operand Operand) {
				865	assert(Operand->getType() == IceType_i64);
				866	if (Operand->getType() != IceType_i64)
				867	return Operand;
				868	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				869	split64(Var);
				870	return Var->getHi();
				871	}
				872	if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
				873	return Ctx->getConstantInt32(
				874	static_cast<uint32_t>(Const->getValue() >> 32));
				875	}
				876	if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
				877	// Conservatively disallow memory operands with side-effects
				878	// in case of duplication.
				879	assert(Mem->getAddrMode() == OperandARM32Mem::Offset \|\|
				880	Mem->getAddrMode() == OperandARM32Mem::NegOffset);
				881	const Type SplitType = IceType_i32;
				882	if (Mem->isRegReg()) {
				883	// We have to make a temp variable T, and add 4 to either Base or Index.
				884	// The Index may be shifted, so adding 4 can mean something else.
				885	// Thus, prefer T := Base + 4, and use T as the new Base.
				886	Variable *Base = Mem->getBase();
				887	Constant *Four = Ctx->getConstantInt32(4);
				888	Variable *NewBase = Func->makeVariable(Base->getType());
				889	lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
				890	Base, Four));
				891	return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
				892	Mem->getShiftOp(), Mem->getShiftAmt(),
				893	Mem->getAddrMode());
				894	} else {
				895	Variable *Base = Mem->getBase();
				896	ConstantInteger32 *Offset = Mem->getOffset();
				897	assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
				898	int32_t NextOffsetVal = Offset->getValue() + 4;
				899	const bool SignExt = false;
				900	if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
				901	// We have to make a temp variable and add 4 to either Base or Offset.
				902	// If we add 4 to Offset, this will convert a non-RegReg addressing
				903	// mode into a RegReg addressing mode. Since NaCl sandboxing disallows
				904	// RegReg addressing modes, prefer adding to base and replacing instead.
				905	// Thus we leave the old offset alone.
				906	Constant *Four = Ctx->getConstantInt32(4);
				907	Variable *NewBase = Func->makeVariable(Base->getType());
				908	lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
				909	NewBase, Base, Four));
				910	Base = NewBase;
				911	} else {
				912	Offset =
				913	llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
				914	}
				915	return OperandARM32Mem::create(Func, SplitType, Base, Offset,
				916	Mem->getAddrMode());
				917	}
				918	}
				919	llvm_unreachable("Unsupported operand type");
				920	return nullptr;
				921	}
				922
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	923	llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
				924	RegSetMask Exclude) const {
				925	llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
				926
				927	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				928	isFP) \
				929	if (scratch && (Include & RegSet_CallerSave)) \
				930	Registers[RegARM32::val] = true; \
				931	if (preserved && (Include & RegSet_CalleeSave)) \
				932	Registers[RegARM32::val] = true; \
				933	if (stackptr && (Include & RegSet_StackPointer)) \
				934	Registers[RegARM32::val] = true; \
				935	if (frameptr && (Include & RegSet_FramePointer)) \
				936	Registers[RegARM32::val] = true; \
				937	if (scratch && (Exclude & RegSet_CallerSave)) \
				938	Registers[RegARM32::val] = false; \
				939	if (preserved && (Exclude & RegSet_CalleeSave)) \
				940	Registers[RegARM32::val] = false; \
				941	if (stackptr && (Exclude & RegSet_StackPointer)) \
				942	Registers[RegARM32::val] = false; \
				943	if (frameptr && (Exclude & RegSet_FramePointer)) \
				944	Registers[RegARM32::val] = false;
				945
				946	REGARM32_TABLE
				947
				948	#undef X
				949
				950	return Registers;
				951	}
				952
				953	void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
				954	UsesFramePointer = true;
				955	// Conservatively require the stack to be aligned. Some stack
				956	// adjustment operations implemented below assume that the stack is
				957	// aligned before the alloca. All the alloca code ensures that the
				958	// stack alignment is preserved after the alloca. The stack alignment
				959	// restriction can be relaxed in some cases.
				960	NeedsStackAlignment = true;
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	961
				962	// TODO(stichnot): minimize the number of adjustments of SP, etc.
				963	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				964	Variable *Dest = Inst->getDest();
				965	uint32_t AlignmentParam = Inst->getAlignInBytes();
				966	// For default align=0, set it to the real value 1, to avoid any
				967	// bit-manipulation problems below.
				968	AlignmentParam = std::max(AlignmentParam, 1u);
				969
				970	// LLVM enforces power of 2 alignment.
				971	assert(llvm::isPowerOf2_32(AlignmentParam));
				972	assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
				973
				974	uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
				975	if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
				976	alignRegisterPow2(SP, Alignment);
				977	}
				978	Operand *TotalSize = Inst->getSizeInBytes();
				979	if (const auto *ConstantTotalSize =
				980	llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
				981	uint32_t Value = ConstantTotalSize->getValue();
				982	Value = Utils::applyAlignment(Value, Alignment);
				983	Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
				984	_sub(SP, SP, SubAmount);
				985	} else {
				986	// Non-constant sizes need to be adjusted to the next highest
				987	// multiple of the required alignment at runtime.
				988	TotalSize = legalize(TotalSize);
				989	Variable *T = makeReg(IceType_i32);
				990	_mov(T, TotalSize);
				991	Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
				992	_add(T, T, AddAmount);
				993	alignRegisterPow2(T, Alignment);
				994	_sub(SP, SP, T);
				995	}
				996	_mov(Dest, SP);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	997	}
				998
				999	void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1000	Variable *Dest = Inst->getDest();
				1001	// TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
				1002	// to legalize Src0 to flex or Src1 to flex and there is a reversible
				1003	// instruction. E.g., reverse subtract with immediate, register vs
				1004	// register, immediate.
				1005	// Or it may be the case that the operands aren't swapped, but the
				1006	// bits can be flipped and a different operation applied.
				1007	// E.g., use BIC (bit clear) instead of AND for some masks.
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1008	Operand *Src0 = Inst->getSrc(0);
				1009	Operand *Src1 = Inst->getSrc(1);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1010	if (Dest->getType() == IceType_i64) {
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1011	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1012	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1013	Variable *Src0RLo = legalizeToVar(loOperand(Src0));
				1014	Variable *Src0RHi = legalizeToVar(hiOperand(Src0));
				1015	Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);
				1016	Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);
				1017	Variable *T_Lo = makeReg(DestLo->getType());
				1018	Variable *T_Hi = makeReg(DestHi->getType());
				1019	switch (Inst->getOp()) {
				1020	case InstArithmetic::_num:
				1021	llvm_unreachable("Unknown arithmetic operator");
				1022	break;
				1023	case InstArithmetic::Add:
				1024	_adds(T_Lo, Src0RLo, Src1Lo);
				1025	_mov(DestLo, T_Lo);
				1026	_adc(T_Hi, Src0RHi, Src1Hi);
				1027	_mov(DestHi, T_Hi);
				1028	break;
				1029	case InstArithmetic::And:
				1030	_and(T_Lo, Src0RLo, Src1Lo);
				1031	_mov(DestLo, T_Lo);
				1032	_and(T_Hi, Src0RHi, Src1Hi);
				1033	_mov(DestHi, T_Hi);
				1034	break;
				1035	case InstArithmetic::Or:
				1036	_orr(T_Lo, Src0RLo, Src1Lo);
				1037	_mov(DestLo, T_Lo);
				1038	_orr(T_Hi, Src0RHi, Src1Hi);
				1039	_mov(DestHi, T_Hi);
				1040	break;
				1041	case InstArithmetic::Xor:
				1042	_eor(T_Lo, Src0RLo, Src1Lo);
				1043	_mov(DestLo, T_Lo);
				1044	_eor(T_Hi, Src0RHi, Src1Hi);
				1045	_mov(DestHi, T_Hi);
				1046	break;
				1047	case InstArithmetic::Sub:
				1048	_subs(T_Lo, Src0RLo, Src1Lo);
				1049	_mov(DestLo, T_Lo);
				1050	_sbc(T_Hi, Src0RHi, Src1Hi);
				1051	_mov(DestHi, T_Hi);
				1052	break;
				1053	case InstArithmetic::Mul: {
				1054	// GCC 4.8 does:
				1055	// a=b*c ==>
				1056	// t_acc =(mul) (b.lo * c.hi)
				1057	// t_acc =(mla) (c.lo * b.hi) + t_acc
				1058	// t.hi,t.lo =(umull) b.lo * c.lo
				1059	// t.hi += t_acc
				1060	// a.lo = t.lo
				1061	// a.hi = t.hi
				1062	//
				1063	// LLVM does:
				1064	// t.hi,t.lo =(umull) b.lo * c.lo
				1065	// t.hi =(mla) (b.lo * c.hi) + t.hi
				1066	// t.hi =(mla) (b.hi * c.lo) + t.hi
				1067	// a.lo = t.lo
				1068	// a.hi = t.hi
				1069	//
				1070	// LLVM's lowering has fewer instructions, but more register pressure:
				1071	// t.lo is live from beginning to end, while GCC delays the two-dest
				1072	// instruction till the end, and kills c.hi immediately.
				1073	Variable *T_Acc = makeReg(IceType_i32);
				1074	Variable *T_Acc1 = makeReg(IceType_i32);
				1075	Variable *T_Hi1 = makeReg(IceType_i32);
				1076	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1077	Variable *Src1RHi = legalizeToVar(Src1Hi);
				1078	_mul(T_Acc, Src0RLo, Src1RHi);
				1079	_mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
				1080	_umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
				1081	_add(T_Hi, T_Hi1, T_Acc1);
				1082	_mov(DestLo, T_Lo);
				1083	_mov(DestHi, T_Hi);
				1084	} break;
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1085	case InstArithmetic::Shl: {
				1086	// a=b<<c ==>
				1087	// GCC 4.8 does:
				1088	// sub t_c1, c.lo, #32
				1089	// lsl t_hi, b.hi, c.lo
				1090	// orr t_hi, t_hi, b.lo, lsl t_c1
				1091	// rsb t_c2, c.lo, #32
				1092	// orr t_hi, t_hi, b.lo, lsr t_c2
				1093	// lsl t_lo, b.lo, c.lo
				1094	// a.lo = t_lo
				1095	// a.hi = t_hi
				1096	// Can be strength-reduced for constant-shifts, but we don't do
				1097	// that for now.
				1098	// Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
				1099	// On ARM, shifts only take the lower 8 bits of the shift register,
				1100	// and saturate to the range 0-32, so the negative value will
				1101	// saturate to 32.
				1102	Variable *T_Hi = makeReg(IceType_i32);
				1103	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1104	Constant *ThirtyTwo = Ctx->getConstantInt32(32);
				1105	Variable *T_C1 = makeReg(IceType_i32);
				1106	Variable *T_C2 = makeReg(IceType_i32);
				1107	_sub(T_C1, Src1RLo, ThirtyTwo);
				1108	_lsl(T_Hi, Src0RHi, Src1RLo);
				1109	_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1110	OperandARM32::LSL, T_C1));
				1111	_rsb(T_C2, Src1RLo, ThirtyTwo);
				1112	_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1113	OperandARM32::LSR, T_C2));
				1114	_mov(DestHi, T_Hi);
				1115	Variable *T_Lo = makeReg(IceType_i32);
				1116	// _mov seems to sometimes have better register preferencing than lsl.
				1117	// Otherwise mov w/ lsl shifted register is a pseudo-instruction
				1118	// that maps to lsl.
				1119	_mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1120	OperandARM32::LSL, Src1RLo));
				1121	_mov(DestLo, T_Lo);
				1122	} break;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1123	case InstArithmetic::Lshr:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1124	// a=b>>c (unsigned) ==>
				1125	// GCC 4.8 does:
				1126	// rsb t_c1, c.lo, #32
				1127	// lsr t_lo, b.lo, c.lo
				1128	// orr t_lo, t_lo, b.hi, lsl t_c1
				1129	// sub t_c2, c.lo, #32
				1130	// orr t_lo, t_lo, b.hi, lsr t_c2
				1131	// lsr t_hi, b.hi, c.lo
				1132	// a.lo = t_lo
				1133	// a.hi = t_hi
				1134	case InstArithmetic::Ashr: {
				1135	// a=b>>c (signed) ==> ...
				1136	// Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
				1137	// and the next orr should be conditioned on PLUS. The last two
				1138	// right shifts should also be arithmetic.
				1139	bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
				1140	Variable *T_Lo = makeReg(IceType_i32);
				1141	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1142	Constant *ThirtyTwo = Ctx->getConstantInt32(32);
				1143	Variable *T_C1 = makeReg(IceType_i32);
				1144	Variable *T_C2 = makeReg(IceType_i32);
				1145	_rsb(T_C1, Src1RLo, ThirtyTwo);
				1146	_lsr(T_Lo, Src0RLo, Src1RLo);
				1147	_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1148	OperandARM32::LSL, T_C1));
				1149	OperandARM32::ShiftKind RShiftKind;
				1150	CondARM32::Cond Pred;
				1151	if (IsAshr) {
				1152	_subs(T_C2, Src1RLo, ThirtyTwo);
				1153	RShiftKind = OperandARM32::ASR;
				1154	Pred = CondARM32::PL;
				1155	} else {
				1156	_sub(T_C2, Src1RLo, ThirtyTwo);
				1157	RShiftKind = OperandARM32::LSR;
				1158	Pred = CondARM32::AL;
				1159	}
				1160	_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1161	RShiftKind, T_C2),
				1162	Pred);
				1163	_mov(DestLo, T_Lo);
				1164	Variable *T_Hi = makeReg(IceType_i32);
				1165	_mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1166	RShiftKind, Src1RLo));
				1167	_mov(DestHi, T_Hi);
				1168	} break;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1169	case InstArithmetic::Udiv:
				1170	case InstArithmetic::Sdiv:
				1171	case InstArithmetic::Urem:
				1172	case InstArithmetic::Srem:
				1173	UnimplementedError(Func->getContext()->getFlags());
				1174	break;
				1175	case InstArithmetic::Fadd:
				1176	case InstArithmetic::Fsub:
				1177	case InstArithmetic::Fmul:
				1178	case InstArithmetic::Fdiv:
				1179	case InstArithmetic::Frem:
				1180	llvm_unreachable("FP instruction with i64 type");
				1181	break;
				1182	}
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1183	} else if (isVectorType(Dest->getType())) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1184	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1185	} else { // Dest->getType() is non-i64 scalar
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1186	Variable *Src0R = legalizeToVar(Inst->getSrc(0));
				1187	Src1 = legalize(Inst->getSrc(1), Legal_Reg \| Legal_Flex);
				1188	Variable *T = makeReg(Dest->getType());
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1189	switch (Inst->getOp()) {
				1190	case InstArithmetic::_num:
				1191	llvm_unreachable("Unknown arithmetic operator");
				1192	break;
				1193	case InstArithmetic::Add: {
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1194	_add(T, Src0R, Src1);
				1195	_mov(Dest, T);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1196	} break;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1197	case InstArithmetic::And: {
				1198	_and(T, Src0R, Src1);
				1199	_mov(Dest, T);
				1200	} break;
				1201	case InstArithmetic::Or: {
				1202	_orr(T, Src0R, Src1);
				1203	_mov(Dest, T);
				1204	} break;
				1205	case InstArithmetic::Xor: {
				1206	_eor(T, Src0R, Src1);
				1207	_mov(Dest, T);
				1208	} break;
				1209	case InstArithmetic::Sub: {
				1210	_sub(T, Src0R, Src1);
				1211	_mov(Dest, T);
				1212	} break;
				1213	case InstArithmetic::Mul: {
				1214	Variable *Src1R = legalizeToVar(Src1);
				1215	_mul(T, Src0R, Src1R);
				1216	_mov(Dest, T);
				1217	} break;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1218	case InstArithmetic::Shl:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1219	_lsl(T, Src0R, Src1);
				1220	_mov(Dest, T);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1221	break;
				1222	case InstArithmetic::Lshr:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1223	_lsr(T, Src0R, Src1);
				1224	_mov(Dest, T);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1225	break;
				1226	case InstArithmetic::Ashr:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1227	_asr(T, Src0R, Src1);
				1228	_mov(Dest, T);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1229	break;
				1230	case InstArithmetic::Udiv:
				1231	UnimplementedError(Func->getContext()->getFlags());
				1232	break;
				1233	case InstArithmetic::Sdiv:
				1234	UnimplementedError(Func->getContext()->getFlags());
				1235	break;
				1236	case InstArithmetic::Urem:
				1237	UnimplementedError(Func->getContext()->getFlags());
				1238	break;
				1239	case InstArithmetic::Srem:
				1240	UnimplementedError(Func->getContext()->getFlags());
				1241	break;
				1242	case InstArithmetic::Fadd:
				1243	UnimplementedError(Func->getContext()->getFlags());
				1244	break;
				1245	case InstArithmetic::Fsub:
				1246	UnimplementedError(Func->getContext()->getFlags());
				1247	break;
				1248	case InstArithmetic::Fmul:
				1249	UnimplementedError(Func->getContext()->getFlags());
				1250	break;
				1251	case InstArithmetic::Fdiv:
				1252	UnimplementedError(Func->getContext()->getFlags());
				1253	break;
				1254	case InstArithmetic::Frem:
				1255	UnimplementedError(Func->getContext()->getFlags());
				1256	break;
				1257	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1258	}
				1259	}
				1260
				1261	void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1262	Variable *Dest = Inst->getDest();
				1263	Operand *Src0 = Inst->getSrc(0);
				1264	assert(Dest->getType() == Src0->getType());
				1265	if (Dest->getType() == IceType_i64) {
				1266	Src0 = legalize(Src0);
				1267	Operand *Src0Lo = loOperand(Src0);
				1268	Operand *Src0Hi = hiOperand(Src0);
				1269	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1270	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1271	Variable T_Lo = nullptr, T_Hi = nullptr;
				1272	_mov(T_Lo, Src0Lo);
				1273	_mov(DestLo, T_Lo);
				1274	_mov(T_Hi, Src0Hi);
				1275	_mov(DestHi, T_Hi);
				1276	} else {
				1277	Operand *SrcR;
				1278	if (Dest->hasReg()) {
				1279	// If Dest already has a physical register, then legalize the
				1280	// Src operand into a Variable with the same register
				1281	// assignment. This is mostly a workaround for advanced phi
				1282	// lowering's ad-hoc register allocation which assumes no
				1283	// register allocation is needed when at least one of the
				1284	// operands is non-memory.
				1285	// TODO(jvoung): check this for ARM.
				1286	SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
				1287	} else {
				1288	// Dest could be a stack operand. Since we could potentially need
				1289	// to do a Store (and store can only have Register operands),
				1290	// legalize this to a register.
				1291	SrcR = legalize(Src0, Legal_Reg);
				1292	}
				1293	if (isVectorType(Dest->getType())) {
				1294	UnimplementedError(Func->getContext()->getFlags());
				1295	} else {
				1296	_mov(Dest, SrcR);
				1297	}
				1298	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1299	}
				1300
				1301	void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1302	if (Inst->isUnconditional()) {
				1303	_br(Inst->getTargetUnconditional());
				1304	return;
				1305	}
				1306	Operand *Cond = Inst->getCondition();
				1307	// TODO(jvoung): Handle folding opportunities.
				1308
				1309	Variable *Src0R = legalizeToVar(Cond);
				1310	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1311	_cmp(Src0R, Zero);
				1312	_br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1313	}
				1314
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1315	void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	1316	MaybeLeafFunc = false;
				1317
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1318	// TODO(jvoung): assign arguments to registers and stack. Also reserve stack.
				1319	if (Instr->getNumArgs()) {
				1320	UnimplementedError(Func->getContext()->getFlags());
				1321	}
				1322
				1323	// Generate the call instruction. Assign its result to a temporary
				1324	// with high register allocation weight.
				1325	Variable *Dest = Instr->getDest();
				1326	// ReturnReg doubles as ReturnRegLo as necessary.
				1327	Variable *ReturnReg = nullptr;
				1328	Variable *ReturnRegHi = nullptr;
				1329	if (Dest) {
				1330	switch (Dest->getType()) {
				1331	case IceType_NUM:
				1332	llvm_unreachable("Invalid Call dest type");
				1333	break;
				1334	case IceType_void:
				1335	break;
				1336	case IceType_i1:
				1337	case IceType_i8:
				1338	case IceType_i16:
				1339	case IceType_i32:
				1340	ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
				1341	break;
				1342	case IceType_i64:
				1343	ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
				1344	ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
				1345	break;
				1346	case IceType_f32:
				1347	case IceType_f64:
				1348	// Use S and D regs.
				1349	UnimplementedError(Func->getContext()->getFlags());
				1350	break;
				1351	case IceType_v4i1:
				1352	case IceType_v8i1:
				1353	case IceType_v16i1:
				1354	case IceType_v16i8:
				1355	case IceType_v8i16:
				1356	case IceType_v4i32:
				1357	case IceType_v4f32:
				1358	// Use Q regs.
				1359	UnimplementedError(Func->getContext()->getFlags());
				1360	break;
				1361	}
				1362	}
				1363	Operand *CallTarget = Instr->getCallTarget();
				1364	// Allow ConstantRelocatable to be left alone as a direct call,
				1365	// but force other constants like ConstantInteger32 to be in
				1366	// a register and make it an indirect call.
				1367	if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
				1368	CallTarget = legalize(CallTarget, Legal_Reg);
				1369	}
				1370	Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
				1371	Context.insert(NewCall);
				1372	if (ReturnRegHi)
				1373	Context.insert(InstFakeDef::create(Func, ReturnRegHi));
				1374
				1375	// Insert a register-kill pseudo instruction.
				1376	Context.insert(InstFakeKill::create(Func, NewCall));
				1377
				1378	// Generate a FakeUse to keep the call live if necessary.
				1379	if (Instr->hasSideEffects() && ReturnReg) {
				1380	Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
				1381	Context.insert(FakeUse);
				1382	}
				1383
				1384	if (!Dest)
				1385	return;
				1386
				1387	// Assign the result of the call to Dest.
				1388	if (ReturnReg) {
				1389	if (ReturnRegHi) {
				1390	assert(Dest->getType() == IceType_i64);
				1391	split64(Dest);
				1392	Variable *DestLo = Dest->getLo();
				1393	Variable *DestHi = Dest->getHi();
				1394	_mov(DestLo, ReturnReg);
				1395	_mov(DestHi, ReturnRegHi);
				1396	} else {
				1397	assert(Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|
				1398	Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|
				1399	isVectorType(Dest->getType()));
				1400	if (isFloatingType(Dest->getType()) \|\| isVectorType(Dest->getType())) {
				1401	UnimplementedError(Func->getContext()->getFlags());
				1402	} else {
				1403	_mov(Dest, ReturnReg);
				1404	}
				1405	}
				1406	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1407	}
				1408
				1409	void TargetARM32::lowerCast(const InstCast *Inst) {
				1410	InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1411	Variable *Dest = Inst->getDest();
				1412	Operand *Src0 = Inst->getSrc(0);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1413	switch (CastKind) {
				1414	default:
				1415	Func->setError("Cast type not supported");
				1416	return;
				1417	case InstCast::Sext: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1418	if (isVectorType(Dest->getType())) {
				1419	UnimplementedError(Func->getContext()->getFlags());
				1420	} else if (Dest->getType() == IceType_i64) {
				1421	// t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
				1422	Constant *ShiftAmt = Ctx->getConstantInt32(31);
				1423	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1424	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1425	Variable *T_Lo = makeReg(DestLo->getType());
				1426	if (Src0->getType() == IceType_i32) {
				1427	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1428	_mov(T_Lo, Src0RF);
				1429	} else if (Src0->getType() == IceType_i1) {
				1430	Variable *Src0R = legalizeToVar(Src0);
				1431	_lsl(T_Lo, Src0R, ShiftAmt);
				1432	_asr(T_Lo, T_Lo, ShiftAmt);
				1433	} else {
				1434	Variable *Src0R = legalizeToVar(Src0);
				1435	_sxt(T_Lo, Src0R);
				1436	}
				1437	_mov(DestLo, T_Lo);
				1438	Variable *T_Hi = makeReg(DestHi->getType());
				1439	if (Src0->getType() != IceType_i1) {
				1440	_mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
				1441	OperandARM32::ASR, ShiftAmt));
				1442	} else {
				1443	// For i1, the asr instruction is already done above.
				1444	_mov(T_Hi, T_Lo);
				1445	}
				1446	_mov(DestHi, T_Hi);
				1447	} else if (Src0->getType() == IceType_i1) {
				1448	// GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
				1449	// lsl t1, src_reg, 31
				1450	// asr t1, t1, 31
				1451	// dst = t1
				1452	Variable *Src0R = legalizeToVar(Src0);
				1453	Constant *ShiftAmt = Ctx->getConstantInt32(31);
				1454	Variable *T = makeReg(Dest->getType());
				1455	_lsl(T, Src0R, ShiftAmt);
				1456	_asr(T, T, ShiftAmt);
				1457	_mov(Dest, T);
				1458	} else {
				1459	// t1 = sxt src; dst = t1
				1460	Variable *Src0R = legalizeToVar(Src0);
				1461	Variable *T = makeReg(Dest->getType());
				1462	_sxt(T, Src0R);
				1463	_mov(Dest, T);
				1464	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1465	break;
				1466	}
				1467	case InstCast::Zext: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1468	if (isVectorType(Dest->getType())) {
				1469	UnimplementedError(Func->getContext()->getFlags());
				1470	} else if (Dest->getType() == IceType_i64) {
				1471	// t1=uxtb src; dst.lo=t1; dst.hi=0
				1472	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1473	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1474	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1475	Variable *T_Lo = makeReg(DestLo->getType());
				1476	// i32 and i1 can just take up the whole register.
				1477	// i32 doesn't need uxt, while i1 will have an and mask later anyway.
				1478	if (Src0->getType() == IceType_i32 \|\| Src0->getType() == IceType_i1) {
				1479	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1480	_mov(T_Lo, Src0RF);
				1481	} else {
				1482	Variable *Src0R = legalizeToVar(Src0);
				1483	_uxt(T_Lo, Src0R);
				1484	}
				1485	if (Src0->getType() == IceType_i1) {
				1486	Constant *One = Ctx->getConstantInt32(1);
				1487	_and(T_Lo, T_Lo, One);
				1488	}
				1489	_mov(DestLo, T_Lo);
				1490	Variable *T_Hi = makeReg(DestLo->getType());
				1491	_mov(T_Hi, Zero);
				1492	_mov(DestHi, T_Hi);
				1493	} else if (Src0->getType() == IceType_i1) {
				1494	// t = Src0; t &= 1; Dest = t
				1495	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1496	Constant *One = Ctx->getConstantInt32(1);
				1497	Variable *T = makeReg(Dest->getType());
				1498	// Just use _mov instead of _uxt since all registers are 32-bit.
				1499	// _uxt requires the source to be a register so could have required
				1500	// a _mov from legalize anyway.
				1501	_mov(T, Src0RF);
				1502	_and(T, T, One);
				1503	_mov(Dest, T);
				1504	} else {
				1505	// t1 = uxt src; dst = t1
				1506	Variable *Src0R = legalizeToVar(Src0);
				1507	Variable *T = makeReg(Dest->getType());
				1508	_uxt(T, Src0R);
				1509	_mov(Dest, T);
				1510	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1511	break;
				1512	}
				1513	case InstCast::Trunc: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1514	if (isVectorType(Dest->getType())) {
				1515	UnimplementedError(Func->getContext()->getFlags());
				1516	} else {
				1517	Operand *Src0 = Inst->getSrc(0);
				1518	if (Src0->getType() == IceType_i64)
				1519	Src0 = loOperand(Src0);
				1520	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1521	// t1 = trunc Src0RF; Dest = t1
				1522	Variable *T = makeReg(Dest->getType());
				1523	_mov(T, Src0RF);
				1524	if (Dest->getType() == IceType_i1)
				1525	_and(T, T, Ctx->getConstantInt1(1));
				1526	_mov(Dest, T);
				1527	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1528	break;
				1529	}
				1530	case InstCast::Fptrunc:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1531	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1532	break;
				1533	case InstCast::Fpext: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1534	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1535	break;
				1536	}
				1537	case InstCast::Fptosi:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1538	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1539	break;
				1540	case InstCast::Fptoui:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1541	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1542	break;
				1543	case InstCast::Sitofp:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1544	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1545	break;
				1546	case InstCast::Uitofp: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1547	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1548	break;
				1549	}
				1550	case InstCast::Bitcast: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1551	Operand *Src0 = Inst->getSrc(0);
				1552	if (Dest->getType() == Src0->getType()) {
				1553	InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
				1554	lowerAssign(Assign);
				1555	return;
				1556	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1557	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1558	break;
				1559	}
				1560	}
				1561	}
				1562
				1563	void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
				1564	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1565	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1566	}
				1567
				1568	void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
				1569	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1570	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1571	}
				1572
				1573	void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1574	Variable *Dest = Inst->getDest();
				1575	Operand *Src0 = Inst->getSrc(0);
				1576	Operand *Src1 = Inst->getSrc(1);
				1577
				1578	if (isVectorType(Dest->getType())) {
				1579	UnimplementedError(Func->getContext()->getFlags());
				1580	return;
				1581	}
				1582
				1583	// a=icmp cond, b, c ==>
				1584	// GCC does:
				1585	// cmp b.hi, c.hi or cmp b.lo, c.lo
				1586	// cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
				1587	// mov.<C1> t, #1 mov.<C1> t, #1
				1588	// mov.<C2> t, #0 mov.<C2> t, #0
				1589	// mov a, t mov a, t
				1590	// where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
				1591	// is used for signed compares. In some cases, b and c need to be swapped
				1592	// as well.
				1593	//
				1594	// LLVM does:
				1595	// for EQ and NE:
				1596	// eor t1, b.hi, c.hi
				1597	// eor t2, b.lo, c.hi
				1598	// orrs t, t1, t2
				1599	// mov.<C> t, #1
				1600	// mov a, t
				1601	//
				1602	// that's nice in that it's just as short but has fewer dependencies
				1603	// for better ILP at the cost of more registers.
				1604	//
				1605	// Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
				1606	// two unconditional mov #0, two cmps, two conditional mov #1,
				1607	// and one conditonal reg mov. That has few dependencies for good ILP,
				1608	// but is a longer sequence.
				1609	//
				1610	// So, we are going with the GCC version since it's usually better (except
				1611	// perhaps for eq/ne). We could revisit special-casing eq/ne later.
				1612	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1613	Constant *One = Ctx->getConstantInt32(1);
				1614	if (Src0->getType() == IceType_i64) {
				1615	InstIcmp::ICond Conditon = Inst->getCondition();
				1616	size_t Index = static_cast<size_t>(Conditon);
				1617	assert(Index < TableIcmp64Size);
				1618	Variable Src0Lo, Src0Hi;
				1619	Operand Src1LoRF, Src1HiRF;
				1620	if (TableIcmp64[Index].Swapped) {
				1621	Src0Lo = legalizeToVar(loOperand(Src1));
				1622	Src0Hi = legalizeToVar(hiOperand(Src1));
				1623	Src1LoRF = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);
				1624	Src1HiRF = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);
				1625	} else {
				1626	Src0Lo = legalizeToVar(loOperand(Src0));
				1627	Src0Hi = legalizeToVar(hiOperand(Src0));
				1628	Src1LoRF = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);
				1629	Src1HiRF = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);
				1630	}
				1631	Variable *T = makeReg(IceType_i32);
				1632	if (TableIcmp64[Index].IsSigned) {
				1633	Variable *ScratchReg = makeReg(IceType_i32);
				1634	_cmp(Src0Lo, Src1LoRF);
				1635	_sbcs(ScratchReg, Src0Hi, Src1HiRF);
				1636	// ScratchReg isn't going to be used, but we need the
				1637	// side-effect of setting flags from this operation.
				1638	Context.insert(InstFakeUse::create(Func, ScratchReg));
				1639	} else {
				1640	_cmp(Src0Hi, Src1HiRF);
				1641	_cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
				1642	}
				1643	_mov(T, One, TableIcmp64[Index].C1);
				1644	_mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
				1645	_mov(Dest, T);
				1646	return;
				1647	}
				1648
				1649	// a=icmp cond b, c ==>
				1650	// GCC does:
				1651	// <u/s>xtb tb, b
				1652	// <u/s>xtb tc, c
				1653	// cmp tb, tc
				1654	// mov.C1 t, #0
				1655	// mov.C2 t, #1
				1656	// mov a, t
				1657	// where the unsigned/sign extension is not needed for 32-bit.
				1658	// They also have special cases for EQ and NE. E.g., for NE:
				1659	// <extend to tb, tc>
				1660	// subs t, tb, tc
				1661	// movne t, #1
				1662	// mov a, t
				1663	//
				1664	// LLVM does:
				1665	// lsl tb, b, #<N>
				1666	// mov t, #0
				1667	// cmp tb, c, lsl #<N>
				1668	// mov.<C> t, #1
				1669	// mov a, t
				1670	//
				1671	// the left shift is by 0, 16, or 24, which allows the comparison to focus
				1672	// on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
				1673	// For the unsigned case, for some reason it does similar to GCC and does
				1674	// a uxtb first. It's not clear to me why that special-casing is needed.
				1675	//
				1676	// We'll go with the LLVM way for now, since it's shorter and has just as
				1677	// few dependencies.
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1678	int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
				1679	assert(ShiftAmt >= 0);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1680	Constant *ShiftConst = nullptr;
				1681	Variable *Src0R = nullptr;
				1682	Variable *T = makeReg(IceType_i32);
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1683	if (ShiftAmt) {
				1684	ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1685	Src0R = makeReg(IceType_i32);
				1686	_lsl(Src0R, legalizeToVar(Src0), ShiftConst);
				1687	} else {
				1688	Src0R = legalizeToVar(Src0);
				1689	}
				1690	_mov(T, Zero);
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1691	if (ShiftAmt) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1692	Variable *Src1R = legalizeToVar(Src1);
				1693	OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
				1694	Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
				1695	_cmp(Src0R, Src1RShifted);
				1696	} else {
				1697	Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);
				1698	_cmp(Src0R, Src1RF);
				1699	}
				1700	_mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
				1701	_mov(Dest, T);
				1702	return;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1703	}
				1704
				1705	void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
				1706	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1707	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1708	}
				1709
				1710	void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
				1711	switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
				1712	case Intrinsics::AtomicCmpxchg: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1713	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1714	return;
				1715	}
				1716	case Intrinsics::AtomicFence:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1717	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1718	return;
				1719	case Intrinsics::AtomicFenceAll:
				1720	// NOTE: FenceAll should prevent and load/store from being moved
				1721	// across the fence (both atomic and non-atomic). The InstARM32Mfence
				1722	// instruction is currently marked coarsely as "HasSideEffects".
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1723	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1724	return;
				1725	case Intrinsics::AtomicIsLockFree: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1726	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1727	return;
				1728	}
				1729	case Intrinsics::AtomicLoad: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1730	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1731	return;
				1732	}
				1733	case Intrinsics::AtomicRMW:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1734	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1735	return;
				1736	case Intrinsics::AtomicStore: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1737	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1738	return;
				1739	}
				1740	case Intrinsics::Bswap: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1741	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1742	return;
				1743	}
				1744	case Intrinsics::Ctpop: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1745	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1746	return;
				1747	}
				1748	case Intrinsics::Ctlz: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1749	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1750	return;
				1751	}
				1752	case Intrinsics::Cttz: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1753	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1754	return;
				1755	}
				1756	case Intrinsics::Fabs: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1757	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1758	return;
				1759	}
				1760	case Intrinsics::Longjmp: {
				1761	InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
				1762	Call->addArg(Instr->getArg(0));
				1763	Call->addArg(Instr->getArg(1));
				1764	lowerCall(Call);
				1765	return;
				1766	}
				1767	case Intrinsics::Memcpy: {
				1768	// In the future, we could potentially emit an inline memcpy/memset, etc.
				1769	// for intrinsic calls w/ a known length.
				1770	InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
				1771	Call->addArg(Instr->getArg(0));
				1772	Call->addArg(Instr->getArg(1));
				1773	Call->addArg(Instr->getArg(2));
				1774	lowerCall(Call);
				1775	return;
				1776	}
				1777	case Intrinsics::Memmove: {
				1778	InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
				1779	Call->addArg(Instr->getArg(0));
				1780	Call->addArg(Instr->getArg(1));
				1781	Call->addArg(Instr->getArg(2));
				1782	lowerCall(Call);
				1783	return;
				1784	}
				1785	case Intrinsics::Memset: {
				1786	// The value operand needs to be extended to a stack slot size
				1787	// because the PNaCl ABI requires arguments to be at least 32 bits
				1788	// wide.
				1789	Operand *ValOp = Instr->getArg(1);
				1790	assert(ValOp->getType() == IceType_i8);
				1791	Variable *ValExt = Func->makeVariable(stackSlotType());
				1792	lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
				1793	InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
				1794	Call->addArg(Instr->getArg(0));
				1795	Call->addArg(ValExt);
				1796	Call->addArg(Instr->getArg(2));
				1797	lowerCall(Call);
				1798	return;
				1799	}
				1800	case Intrinsics::NaClReadTP: {
				1801	if (Ctx->getFlags().getUseSandboxing()) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1802	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1803	} else {
				1804	InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
				1805	lowerCall(Call);
				1806	}
				1807	return;
				1808	}
				1809	case Intrinsics::Setjmp: {
				1810	InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
				1811	Call->addArg(Instr->getArg(0));
				1812	lowerCall(Call);
				1813	return;
				1814	}
				1815	case Intrinsics::Sqrt: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1816	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1817	return;
				1818	}
				1819	case Intrinsics::Stacksave: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1820	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1821	return;
				1822	}
				1823	case Intrinsics::Stackrestore: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1824	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1825	return;
				1826	}
				1827	case Intrinsics::Trap:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1828	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1829	return;
				1830	case Intrinsics::UnknownIntrinsic:
				1831	Func->setError("Should not be lowering UnknownIntrinsic");
				1832	return;
				1833	}
				1834	return;
				1835	}
				1836
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	1837	void TargetARM32::lowerLoad(const InstLoad *Load) {
				1838	// A Load instruction can be treated the same as an Assign
				1839	// instruction, after the source operand is transformed into an
				1840	// OperandARM32Mem operand.
				1841	Type Ty = Load->getDest()->getType();
				1842	Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
				1843	Variable *DestLoad = Load->getDest();
				1844
				1845	// TODO(jvoung): handled folding opportunities. Sign and zero extension
				1846	// can be folded into a load.
				1847	InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
				1848	lowerAssign(Assign);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1849	}
				1850
				1851	void TargetARM32::doAddressOptLoad() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1852	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1853	}
				1854
				1855	void TargetARM32::randomlyInsertNop(float Probability) {
				1856	RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
				1857	if (RNG.getTrueWithProbability(Probability)) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1858	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1859	}
				1860	}
				1861
				1862	void TargetARM32::lowerPhi(const InstPhi * /Inst/) {
				1863	Func->setError("Phi found in regular instruction list");
				1864	}
				1865
				1866	void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1867	Variable *Reg = nullptr;
				1868	if (Inst->hasRetValue()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1869	Operand *Src0 = Inst->getRetValue();
				1870	if (Src0->getType() == IceType_i64) {
				1871	Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0);
				1872	Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1);
				1873	Reg = R0;
				1874	Context.insert(InstFakeUse::create(Func, R1));
				1875	} else if (isScalarFloatingType(Src0->getType())) {
				1876	UnimplementedError(Func->getContext()->getFlags());
				1877	} else if (isVectorType(Src0->getType())) {
				1878	UnimplementedError(Func->getContext()->getFlags());
				1879	} else {
				1880	Operand *Src0F = legalize(Src0, Legal_Reg \| Legal_Flex);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1881	_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1882	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1883	}
				1884	// Add a ret instruction even if sandboxing is enabled, because
				1885	// addEpilog explicitly looks for a ret instruction as a marker for
				1886	// where to insert the frame removal instructions.
				1887	// addEpilog is responsible for restoring the "lr" register as needed
				1888	// prior to this ret instruction.
				1889	_ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
				1890	// Add a fake use of sp to make sure sp stays alive for the entire
				1891	// function. Otherwise post-call sp adjustments get dead-code
				1892	// eliminated. TODO: Are there more places where the fake use
				1893	// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
				1894	// have a ret instruction.
				1895	Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
				1896	Context.insert(InstFakeUse::create(Func, SP));
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1897	}
				1898
				1899	void TargetARM32::lowerSelect(const InstSelect *Inst) {
				1900	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1901	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1902	}
				1903
				1904	void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	1905	Operand *Value = Inst->getData();
				1906	Operand *Addr = Inst->getAddr();
				1907	OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
				1908	Type Ty = NewAddr->getType();
				1909
				1910	if (Ty == IceType_i64) {
				1911	Variable *ValueHi = legalizeToVar(hiOperand(Value));
				1912	Variable *ValueLo = legalizeToVar(loOperand(Value));
				1913	_str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
				1914	_str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
				1915	} else if (isVectorType(Ty)) {
				1916	UnimplementedError(Func->getContext()->getFlags());
				1917	} else {
				1918	Variable *ValueR = legalizeToVar(Value);
				1919	_str(ValueR, NewAddr);
				1920	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1921	}
				1922
				1923	void TargetARM32::doAddressOptStore() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1924	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1925	}
				1926
				1927	void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
				1928	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1929	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1930	}
				1931
				1932	void TargetARM32::lowerUnreachable(const InstUnreachable * /Inst/) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1933	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1934	}
				1935
				1936	// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
				1937	// preserve integrity of liveness analysis. Undef values are also
				1938	// turned into zeroes, since loOperand() and hiOperand() don't expect
				1939	// Undef input.
				1940	void TargetARM32::prelowerPhis() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1941	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1942	}
				1943
				1944	// Lower the pre-ordered list of assignments into mov instructions.
				1945	// Also has to do some ad-hoc register allocation as necessary.
				1946	void TargetARM32::lowerPhiAssignments(CfgNode *Node,
				1947	const AssignList &Assignments) {
				1948	(void)Node;
				1949	(void)Assignments;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1950	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1951	}
				1952
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1953	Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
				1954	Variable *Reg = makeReg(Ty, RegNum);
				1955	UnimplementedError(Func->getContext()->getFlags());
				1956	return Reg;
				1957	}
				1958
				1959	// Helper for legalize() to emit the right code to lower an operand to a
				1960	// register of the appropriate type.
				1961	Variable TargetARM32::copyToReg(Operand Src, int32_t RegNum) {
				1962	Type Ty = Src->getType();
				1963	Variable *Reg = makeReg(Ty, RegNum);
				1964	if (isVectorType(Ty)) {
				1965	UnimplementedError(Func->getContext()->getFlags());
				1966	} else {
				1967	// Mov's Src operand can really only be the flexible second operand type
				1968	// or a register. Users should guarantee that.
				1969	_mov(Reg, Src);
				1970	}
				1971	return Reg;
				1972	}
				1973
				1974	Operand TargetARM32::legalize(Operand From, LegalMask Allowed,
				1975	int32_t RegNum) {
				1976	// Assert that a physical register is allowed. To date, all calls
				1977	// to legalize() allow a physical register. Legal_Flex converts
				1978	// registers to the right type OperandARM32FlexReg as needed.
				1979	assert(Allowed & Legal_Reg);
				1980	// Go through the various types of operands:
				1981	// OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
				1982	// Given the above assertion, if type of operand is not legal
				1983	// (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
				1984	// to a register.
				1985	if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
				1986	// Before doing anything with a Mem operand, we need to ensure
				1987	// that the Base and Index components are in physical registers.
				1988	Variable *Base = Mem->getBase();
				1989	Variable *Index = Mem->getIndex();
				1990	Variable *RegBase = nullptr;
				1991	Variable *RegIndex = nullptr;
				1992	if (Base) {
				1993	RegBase = legalizeToVar(Base);
				1994	}
				1995	if (Index) {
				1996	RegIndex = legalizeToVar(Index);
				1997	}
				1998	// Create a new operand if there was a change.
				1999	if (Base != RegBase \|\| Index != RegIndex) {
				2000	// There is only a reg +/- reg or reg + imm form.
				2001	// Figure out which to re-create.
				2002	if (Mem->isRegReg()) {
				2003	Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase, RegIndex,
				2004	Mem->getShiftOp(), Mem->getShiftAmt(),
				2005	Mem->getAddrMode());
				2006	} else {
				2007	Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase,
				2008	Mem->getOffset(), Mem->getAddrMode());
				2009	}
				2010	}
				2011	if (!(Allowed & Legal_Mem)) {
				2012	Type Ty = Mem->getType();
				2013	Variable *Reg = makeReg(Ty, RegNum);
				2014	_ldr(Reg, Mem);
				2015	From = Reg;
				2016	} else {
				2017	From = Mem;
				2018	}
				2019	return From;
				2020	}
				2021
				2022	if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
				2023	if (!(Allowed & Legal_Flex)) {
				2024	if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
				2025	if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
				2026	From = FlexReg->getReg();
				2027	// Fall through and let From be checked as a Variable below,
				2028	// where it may or may not need a register.
				2029	} else {
				2030	return copyToReg(Flex, RegNum);
				2031	}
				2032	} else {
				2033	return copyToReg(Flex, RegNum);
				2034	}
				2035	} else {
				2036	return From;
				2037	}
				2038	}
				2039
				2040	if (llvm::isa<Constant>(From)) {
				2041	if (llvm::isa<ConstantUndef>(From)) {
				2042	// Lower undefs to zero. Another option is to lower undefs to an
				2043	// uninitialized register; however, using an uninitialized register
				2044	// results in less predictable code.
				2045	if (isVectorType(From->getType()))
				2046	return makeVectorOfZeros(From->getType(), RegNum);
				2047	From = Ctx->getConstantZero(From->getType());
				2048	}
				2049	// There should be no constants of vector type (other than undef).
				2050	assert(!isVectorType(From->getType()));
				2051	bool CanBeFlex = Allowed & Legal_Flex;
				2052	if (auto C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
				2053	uint32_t RotateAmt;
				2054	uint32_t Immed_8;
				2055	uint32_t Value = static_cast<uint32_t>(C32->getValue());
				2056	// Check if the immediate will fit in a Flexible second operand,
				2057	// if a Flexible second operand is allowed. We need to know the exact
				2058	// value, so that rules out relocatable constants.
				2059	// Also try the inverse and use MVN if possible.
				2060	if (CanBeFlex &&
				2061	OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
				2062	return OperandARM32FlexImm::create(Func, From->getType(), Immed_8,
				2063	RotateAmt);
				2064	} else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
				2065	~Value, &RotateAmt, &Immed_8)) {
				2066	auto InvertedFlex = OperandARM32FlexImm::create(Func, From->getType(),
				2067	Immed_8, RotateAmt);
				2068	Type Ty = From->getType();
				2069	Variable *Reg = makeReg(Ty, RegNum);
				2070	_mvn(Reg, InvertedFlex);
				2071	return Reg;
				2072	} else {
				2073	// Do a movw/movt to a register.
				2074	Type Ty = From->getType();
				2075	Variable *Reg = makeReg(Ty, RegNum);
				2076	uint32_t UpperBits = (Value >> 16) & 0xFFFF;
				2077	_movw(Reg,
				2078	UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
				2079	if (UpperBits != 0) {
				2080	_movt(Reg, Ctx->getConstantInt32(UpperBits));
				2081	}
				2082	return Reg;
				2083	}
				2084	} else if (auto C = llvm::dyn_cast<ConstantRelocatable>(From)) {
				2085	Type Ty = From->getType();
				2086	Variable *Reg = makeReg(Ty, RegNum);
				2087	_movw(Reg, C);
				2088	_movt(Reg, C);
				2089	return Reg;
				2090	} else {
				2091	// Load floats/doubles from literal pool.
				2092	UnimplementedError(Func->getContext()->getFlags());
				2093	From = copyToReg(From, RegNum);
				2094	}
				2095	return From;
				2096	}
				2097
				2098	if (auto Var = llvm::dyn_cast<Variable>(From)) {
				2099	// Check if the variable is guaranteed a physical register. This
				2100	// can happen either when the variable is pre-colored or when it is
				2101	// assigned infinite weight.
				2102	bool MustHaveRegister = (Var->hasReg() \|\| Var->getWeight().isInf());
				2103	// We need a new physical register for the operand if:
				2104	// Mem is not allowed and Var isn't guaranteed a physical
				2105	// register, or
				2106	// RegNum is required and Var->getRegNum() doesn't match.
				2107	if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|
				2108	(RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
				2109	From = copyToReg(From, RegNum);
				2110	}
				2111	return From;
				2112	}
				2113	llvm_unreachable("Unhandled operand kind in legalize()");
				2114
				2115	return From;
				2116	}
				2117
				2118	// Provide a trivial wrapper to legalize() for this common usage.
				2119	Variable TargetARM32::legalizeToVar(Operand From, int32_t RegNum) {
				2120	return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
				2121	}
				2122
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2123	OperandARM32Mem TargetARM32::formMemoryOperand(Operand Operand, Type Ty) {
				2124	OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
				2125	// It may be the case that address mode optimization already creates
				2126	// an OperandARM32Mem, so in that case it wouldn't need another level
				2127	// of transformation.
				2128	if (Mem) {
				2129	return llvm::cast<OperandARM32Mem>(legalize(Mem));
				2130	}
				2131	// If we didn't do address mode optimization, then we only
				2132	// have a base/offset to work with. ARM always requires a base
				2133	// register, so just use that to hold the operand.
				2134	Variable *Base = legalizeToVar(Operand);
				2135	return OperandARM32Mem::create(
				2136	Func, Ty, Base,
				2137	llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
				2138	}
				2139
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2140	Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
				2141	// There aren't any 64-bit integer registers for ARM32.
				2142	assert(Type != IceType_i64);
				2143	Variable *Reg = Func->makeVariable(Type);
				2144	if (RegNum == Variable::NoRegister)
				2145	Reg->setWeightInfinite();
				2146	else
				2147	Reg->setRegNum(RegNum);
				2148	return Reg;
				2149	}
				2150
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2151	void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
				2152	assert(llvm::isPowerOf2_32(Align));
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	2153	uint32_t RotateAmt;
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2154	uint32_t Immed_8;
				2155	Operand *Mask;
				2156	// Use AND or BIC to mask off the bits, depending on which immediate fits
				2157	// (if it fits at all). Assume Align is usually small, in which case BIC
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	2158	// works better. Thus, this rounds down to the alignment.
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2159	if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
				2160	Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg \| Legal_Flex);
				2161	_bic(Reg, Reg, Mask);
				2162	} else {
				2163	Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg \| Legal_Flex);
				2164	_and(Reg, Reg, Mask);
				2165	}
				2166	}
				2167
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2168	void TargetARM32::postLower() {
				2169	if (Ctx->getFlags().getOptLevel() == Opt_m1)
				2170	return;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2171	inferTwoAddress();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2172	}
				2173
				2174	void TargetARM32::makeRandomRegisterPermutation(
				2175	llvm::SmallVectorImpl<int32_t> &Permutation,
				2176	const llvm::SmallBitVector &ExcludeRegisters) const {
				2177	(void)Permutation;
				2178	(void)ExcludeRegisters;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2179	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2180	}
				2181
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2182	void TargetARM32::emit(const ConstantInteger32 *C) const {
				2183	if (!ALLOW_DUMP)
				2184	return;
				2185	Ostream &Str = Ctx->getStrEmit();
				2186	Str << getConstantPrefix() << C->getValue();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2187	}
				2188
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2189	void TargetARM32::emit(const ConstantInteger64 *) const {
				2190	llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2191	}
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2192
				2193	void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2194	(void)C;
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2195	UnimplementedError(Ctx->getFlags());
				2196	}
				2197
				2198	void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2199	(void)C;
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2200	UnimplementedError(Ctx->getFlags());
				2201	}
				2202
				2203	void TargetARM32::emit(const ConstantUndef *) const {
				2204	llvm::report_fatal_error("undef value encountered by emitter.");
				2205	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2206
				2207	TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
				2208	: TargetDataLowering(Ctx) {}
				2209
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame^]	2210	void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
				2211	const IceString &SectionSuffix) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2212	switch (Ctx->getFlags().getOutFileType()) {
				2213	case FT_Elf: {
				2214	ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame^]	2215	Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2216	} break;
				2217	case FT_Asm:
				2218	case FT_Iasm: {
				2219	const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
				2220	OstreamLocker L(Ctx);
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame^]	2221	for (const VariableDeclaration *Var : Vars) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2222	if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame^]	2223	emitGlobal(*Var, SectionSuffix);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2224	}
				2225	}
				2226	} break;
				2227	}
				2228	}
				2229
John Porto	0f86d03	2015-06-15 07:44:27 -0700	[diff] [blame]	2230	void TargetDataARM32::lowerConstants() {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2231	if (Ctx->getFlags().getDisableTranslation())
				2232	return;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2233	UnimplementedError(Ctx->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2234	}
				2235
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2236	TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
				2237	: TargetHeaderLowering(Ctx) {}
				2238
				2239	void TargetHeaderARM32::lower() {
				2240	OstreamLocker L(Ctx);
				2241	Ostream &Str = Ctx->getStrEmit();
				2242	Str << ".syntax unified\n";
				2243	// Emit build attributes in format: .eabi_attribute TAG, VALUE.
				2244	// See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
				2245	// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
				2246	//
				2247	// Tag_conformance should be be emitted first in a file-scope
				2248	// sub-subsection of the first public subsection of the attributes.
				2249	Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
				2250	// Chromebooks are at least A15, but do A9 for higher compat.
				2251	Str << ".cpu cortex-a9\n"
				2252	<< ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
				2253	<< ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
				2254	Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
				2255	<< ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
				2256	// TODO(jvoung): check other CPU features like HW div.
				2257	Str << ".fpu neon\n"
				2258	<< ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
				2259	<< ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
				2260	<< ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
				2261	<< ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
				2262	<< ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
				2263	<< ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
				2264	<< ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
				2265	<< ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
				2266	<< ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
				2267	<< ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
				2268	<< ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
				2269	<< ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
				2270	// Technically R9 is used for TLS with Sandboxing, and we reserve it.
				2271	// However, for compatibility with current NaCl LLVM, don't claim that.
				2272	Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
				2273	}
				2274
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2275	} // end of namespace Ice