Blame - src/IceTargetLoweringX8632.cpp - SwiftShader

blob: 1e510471a902343e8b794a3dd7a8a9a1eb20862a [file] [log] [blame]

Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1	//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
				2	//
				3	// The Subzero Code Generator
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the TargetLoweringX8632 class, which
				11	// consists almost entirely of the lowering sequence for each
				12	// high-level instruction. It also implements
				13	// TargetX8632Fast::postLower() which does the simplest possible
				14	// register allocation for the "fast" target.
				15	//
				16	//===----------------------------------------------------------------------===//
				17
				18	#include "IceDefs.h"
				19	#include "IceCfg.h"
				20	#include "IceCfgNode.h"
				21	#include "IceInstX8632.h"
				22	#include "IceOperand.h"
				23	#include "IceTargetLoweringX8632.def"
				24	#include "IceTargetLoweringX8632.h"
				25
				26	namespace Ice {
				27
				28	namespace {
				29
				30	// The following table summarizes the logic for lowering the fcmp instruction.
				31	// There is one table entry for each of the 16 conditions. A comment in
				32	// lowerFcmp() describes the lowering template. In the most general case, there
				33	// is a compare followed by two conditional branches, because some fcmp
				34	// conditions don't map to a single x86 conditional branch. However, in many
				35	// cases it is possible to swap the operands in the comparison and have a single
				36	// conditional branch. Since it's quite tedious to validate the table by hand,
				37	// good execution tests are helpful.
				38
				39	const struct TableFcmp_ {
				40	uint32_t Default;
				41	bool SwapOperands;
				42	InstX8632Br::BrCond C1, C2;
				43	} TableFcmp[] = {
				44	#define X(val, dflt, swap, C1, C2) \
				45	{ dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
				46	,
				47	FCMPX8632_TABLE
				48	#undef X
				49	};
				50	const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
				51
				52	// The following table summarizes the logic for lowering the icmp instruction
				53	// for i32 and narrower types. Each icmp condition has a clear mapping to an
				54	// x86 conditional branch instruction.
				55
				56	const struct TableIcmp32_ {
				57	InstX8632Br::BrCond Mapping;
				58	} TableIcmp32[] = {
				59	#define X(val, C_32, C1_64, C2_64, C3_64) \
				60	{ InstX8632Br::C_32 } \
				61	,
				62	ICMPX8632_TABLE
				63	#undef X
				64	};
				65	const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
				66
				67	// The following table summarizes the logic for lowering the icmp instruction
				68	// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
				69	// conditional branches are needed. For the other conditions, three separate
				70	// conditional branches are needed.
				71	const struct TableIcmp64_ {
				72	InstX8632Br::BrCond C1, C2, C3;
				73	} TableIcmp64[] = {
				74	#define X(val, C_32, C1_64, C2_64, C3_64) \
				75	{ InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
				76	,
				77	ICMPX8632_TABLE
				78	#undef X
				79	};
				80	const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
				81
				82	InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
				83	size_t Index = static_cast<size_t>(Cond);
				84	assert(Index < TableIcmp32Size);
				85	return TableIcmp32[Index].Mapping;
				86	}
				87
				88	// In some cases, there are x-macros tables for both high-level and
				89	// low-level instructions/operands that use the same enum key value.
				90	// The tables are kept separate to maintain a proper separation
				91	// between abstraction layers. There is a risk that the tables
				92	// could get out of sync if enum values are reordered or if entries
				93	// are added or deleted. This dummy function uses static_assert to
				94	// ensure everything is kept in sync.
				95	void xMacroIntegrityCheck() {
				96	// Validate the enum values in FCMPX8632_TABLE.
				97	{
				98	// Define a temporary set of enum values based on low-level
				99	// table entries.
				100	enum _tmp_enum {
				101	#define X(val, dflt, swap, C1, C2) _tmp_##val,
				102	FCMPX8632_TABLE
				103	#undef X
Jim Stichnoth	4376d29	2014-05-23 13:39:02 -0700	[diff] [blame]	104	_num
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	105	};
				106	// Define a set of constants based on high-level table entries.
				107	#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
				108	ICEINSTFCMP_TABLE;
				109	#undef X
				110	// Define a set of constants based on low-level table entries,
				111	// and ensure the table entry keys are consistent.
				112	#define X(val, dflt, swap, C1, C2) \
				113	static const int _table2_##val = _tmp_##val; \
				114	STATIC_ASSERT(_table1_##val == _table2_##val);
				115	FCMPX8632_TABLE;
				116	#undef X
				117	// Repeat the static asserts with respect to the high-level
				118	// table entries in case the high-level table has extra entries.
				119	#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
				120	ICEINSTFCMP_TABLE;
				121	#undef X
				122	}
				123
				124	// Validate the enum values in ICMPX8632_TABLE.
				125	{
				126	// Define a temporary set of enum values based on low-level
				127	// table entries.
				128	enum _tmp_enum {
				129	#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
				130	ICMPX8632_TABLE
				131	#undef X
Jim Stichnoth	4376d29	2014-05-23 13:39:02 -0700	[diff] [blame]	132	_num
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	133	};
				134	// Define a set of constants based on high-level table entries.
				135	#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
				136	ICEINSTICMP_TABLE;
				137	#undef X
				138	// Define a set of constants based on low-level table entries,
				139	// and ensure the table entry keys are consistent.
				140	#define X(val, C_32, C1_64, C2_64, C3_64) \
				141	static const int _table2_##val = _tmp_##val; \
				142	STATIC_ASSERT(_table1_##val == _table2_##val);
				143	ICMPX8632_TABLE;
				144	#undef X
				145	// Repeat the static asserts with respect to the high-level
				146	// table entries in case the high-level table has extra entries.
				147	#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
				148	ICEINSTICMP_TABLE;
				149	#undef X
				150	}
				151
				152	// Validate the enum values in ICETYPEX8632_TABLE.
				153	{
				154	// Define a temporary set of enum values based on low-level
				155	// table entries.
				156	enum _tmp_enum {
				157	#define X(tag, cvt, sdss, width) _tmp_##tag,
				158	ICETYPEX8632_TABLE
				159	#undef X
Jim Stichnoth	4376d29	2014-05-23 13:39:02 -0700	[diff] [blame]	160	_num
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	161	};
				162	// Define a set of constants based on high-level table entries.
				163	#define X(tag, size, align, str) static const int _table1_##tag = tag;
				164	ICETYPE_TABLE;
				165	#undef X
				166	// Define a set of constants based on low-level table entries,
				167	// and ensure the table entry keys are consistent.
				168	#define X(tag, cvt, sdss, width) \
				169	static const int _table2_##tag = _tmp_##tag; \
				170	STATIC_ASSERT(_table1_##tag == _table2_##tag);
				171	ICETYPEX8632_TABLE;
				172	#undef X
				173	// Repeat the static asserts with respect to the high-level
				174	// table entries in case the high-level table has extra entries.
				175	#define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
				176	ICETYPE_TABLE;
				177	#undef X
				178	}
				179	}
				180
				181	} // end of anonymous namespace
				182
				183	TargetX8632::TargetX8632(Cfg *Func)
				184	: TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),
				185	LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
				186	PhysicalRegisters(VarList(Reg_NUM)) {
				187	// TODO: Don't initialize IntegerRegisters and friends every time.
				188	// Instead, initialize in some sort of static initializer for the
				189	// class.
				190	llvm::SmallBitVector IntegerRegisters(Reg_NUM);
				191	llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
				192	llvm::SmallBitVector FloatRegisters(Reg_NUM);
				193	llvm::SmallBitVector InvalidRegisters(Reg_NUM);
				194	ScratchRegs.resize(Reg_NUM);
				195	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				196	frameptr, isI8, isInt, isFP) \
				197	IntegerRegisters[val] = isInt; \
				198	IntegerRegistersI8[val] = isI8; \
				199	FloatRegisters[val] = isFP; \
				200	ScratchRegs[val] = scratch;
				201	REGX8632_TABLE;
				202	#undef X
				203	TypeToRegisterSet[IceType_void] = InvalidRegisters;
				204	TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
				205	TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
				206	TypeToRegisterSet[IceType_i16] = IntegerRegisters;
				207	TypeToRegisterSet[IceType_i32] = IntegerRegisters;
				208	TypeToRegisterSet[IceType_i64] = IntegerRegisters;
				209	TypeToRegisterSet[IceType_f32] = FloatRegisters;
				210	TypeToRegisterSet[IceType_f64] = FloatRegisters;
				211	}
				212
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	213	void TargetX8632::translateO2() {
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	214	GlobalContext *Context = Func->getContext();
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	215
				216	// Lower Phi instructions.
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	217	Timer T_placePhiLoads;
				218	Func->placePhiLoads();
				219	if (Func->hasError())
				220	return;
				221	T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
				222	Timer T_placePhiStores;
				223	Func->placePhiStores();
				224	if (Func->hasError())
				225	return;
				226	T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
				227	Timer T_deletePhis;
				228	Func->deletePhis();
				229	if (Func->hasError())
				230	return;
				231	T_deletePhis.printElapsedUs(Context, "deletePhis()");
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	232	Func->dump("After Phi lowering");
				233
				234	// Address mode optimization.
				235	Timer T_doAddressOpt;
				236	Func->doAddressOpt();
				237	T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()");
				238
				239	// Target lowering. This requires liveness analysis for some parts
				240	// of the lowering decisions, such as compare/branch fusing. If
				241	// non-lightweight liveness analysis is used, the instructions need
				242	// to be renumbered first. TODO: This renumbering should only be
				243	// necessary if we're actually calculating live intervals, which we
				244	// only do for register allocation.
				245	Timer T_renumber1;
				246	Func->renumberInstructions();
				247	if (Func->hasError())
				248	return;
				249	T_renumber1.printElapsedUs(Context, "renumberInstructions()");
				250	// TODO: It should be sufficient to use the fastest liveness
				251	// calculation, i.e. livenessLightweight(). However, for some
				252	// reason that slows down the rest of the translation. Investigate.
				253	Timer T_liveness1;
				254	Func->liveness(Liveness_Basic);
				255	if (Func->hasError())
				256	return;
				257	T_liveness1.printElapsedUs(Context, "liveness()");
				258	Func->dump("After x86 address mode opt");
				259	Timer T_genCode;
				260	Func->genCode();
				261	if (Func->hasError())
				262	return;
				263	T_genCode.printElapsedUs(Context, "genCode()");
				264
				265	// Register allocation. This requires instruction renumbering and
				266	// full liveness analysis.
				267	Timer T_renumber2;
				268	Func->renumberInstructions();
				269	if (Func->hasError())
				270	return;
				271	T_renumber2.printElapsedUs(Context, "renumberInstructions()");
				272	Timer T_liveness2;
				273	Func->liveness(Liveness_Intervals);
				274	if (Func->hasError())
				275	return;
				276	T_liveness2.printElapsedUs(Context, "liveness()");
				277	// Validate the live range computations. Do it outside the timing
				278	// code. TODO: Put this under a flag.
				279	bool ValidLiveness = Func->validateLiveness();
				280	assert(ValidLiveness);
				281	(void)ValidLiveness; // used only in assert()
				282	ComputedLiveRanges = true;
				283	// The post-codegen dump is done here, after liveness analysis and
				284	// associated cleanup, to make the dump cleaner and more useful.
				285	Func->dump("After initial x8632 codegen");
				286	Timer T_regAlloc;
				287	regAlloc();
				288	if (Func->hasError())
				289	return;
				290	T_regAlloc.printElapsedUs(Context, "regAlloc()");
				291	Func->dump("After linear scan regalloc");
				292
				293	// Stack frame mapping.
				294	Timer T_genFrame;
				295	Func->genFrame();
				296	if (Func->hasError())
				297	return;
				298	T_genFrame.printElapsedUs(Context, "genFrame()");
				299	Func->dump("After stack frame mapping");
				300	}
				301
				302	void TargetX8632::translateOm1() {
				303	GlobalContext *Context = Func->getContext();
				304	Timer T_placePhiLoads;
				305	Func->placePhiLoads();
				306	if (Func->hasError())
				307	return;
				308	T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
				309	Timer T_placePhiStores;
				310	Func->placePhiStores();
				311	if (Func->hasError())
				312	return;
				313	T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
				314	Timer T_deletePhis;
				315	Func->deletePhis();
				316	if (Func->hasError())
				317	return;
				318	T_deletePhis.printElapsedUs(Context, "deletePhis()");
				319	Func->dump("After Phi lowering");
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	320
				321	Timer T_genCode;
				322	Func->genCode();
				323	if (Func->hasError())
				324	return;
				325	T_genCode.printElapsedUs(Context, "genCode()");
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	326	Func->dump("After initial x8632 codegen");
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	327
				328	Timer T_genFrame;
				329	Func->genFrame();
				330	if (Func->hasError())
				331	return;
				332	T_genFrame.printElapsedUs(Context, "genFrame()");
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	333	Func->dump("After stack frame mapping");
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	334	}
				335
				336	IceString TargetX8632::RegNames[] = {
				337	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				338	frameptr, isI8, isInt, isFP) \
				339	name,
				340	REGX8632_TABLE
				341	#undef X
				342	};
				343
				344	Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
				345	assert(RegNum < PhysicalRegisters.size());
				346	Variable *Reg = PhysicalRegisters[RegNum];
				347	if (Reg == NULL) {
				348	CfgNode *Node = NULL; // NULL means multi-block lifetime
				349	Reg = Func->makeVariable(IceType_i32, Node);
				350	Reg->setRegNum(RegNum);
				351	PhysicalRegisters[RegNum] = Reg;
				352	}
				353	return Reg;
				354	}
				355
				356	IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
				357	assert(RegNum < Reg_NUM);
				358	static IceString RegNames8[] = {
				359	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				360	frameptr, isI8, isInt, isFP) \
Jim Stichnoth	4376d29	2014-05-23 13:39:02 -0700	[diff] [blame]	361	name8,
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	362	REGX8632_TABLE
				363	#undef X
				364	};
				365	static IceString RegNames16[] = {
				366	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				367	frameptr, isI8, isInt, isFP) \
Jim Stichnoth	4376d29	2014-05-23 13:39:02 -0700	[diff] [blame]	368	name16,
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	369	REGX8632_TABLE
				370	#undef X
				371	};
				372	switch (Ty) {
				373	case IceType_i1:
				374	case IceType_i8:
				375	return RegNames8[RegNum];
				376	case IceType_i16:
				377	return RegNames16[RegNum];
				378	default:
				379	return RegNames[RegNum];
				380	}
				381	}
				382
				383	void TargetX8632::emitVariable(const Variable Var, const Cfg Func) const {
				384	Ostream &Str = Ctx->getStrEmit();
				385	assert(Var->getLocalUseNode() == NULL \|\|
				386	Var->getLocalUseNode() == Func->getCurrentNode());
				387	if (Var->hasReg()) {
				388	Str << getRegName(Var->getRegNum(), Var->getType());
				389	return;
				390	}
				391	Str << InstX8632::getWidthString(Var->getType());
				392	Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);
Jim Stichnoth	b0e142b	2014-06-12 15:28:56 -0700	[diff] [blame]	393	int32_t Offset = Var->getStackOffset();
				394	if (!hasFramePointer())
				395	Offset += getStackAdjustment();
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	396	if (Offset) {
				397	if (Offset > 0)
				398	Str << "+";
				399	Str << Offset;
				400	}
				401	Str << "]";
				402	}
				403
				404	// Helper function for addProlog(). Sets the frame offset for Arg,
				405	// updates InArgsSizeBytes according to Arg's width, and generates an
				406	// instruction to copy Arg into its assigned register if applicable.
				407	// For an I64 arg that has been split into Lo and Hi components, it
				408	// calls itself recursively on the components, taking care to handle
				409	// Lo first because of the little-endian architecture.
				410	void TargetX8632::setArgOffsetAndCopy(Variable Arg, Variable FramePtr,
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	411	size_t BasicFrameOffset,
				412	size_t &InArgsSizeBytes) {
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	413	Variable *Lo = Arg->getLo();
				414	Variable *Hi = Arg->getHi();
				415	Type Ty = Arg->getType();
				416	if (Lo && Hi && Ty == IceType_i64) {
				417	assert(Lo->getType() != IceType_i64); // don't want infinite recursion
				418	assert(Hi->getType() != IceType_i64); // don't want infinite recursion
				419	setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				420	setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				421	return;
				422	}
				423	Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
				424	if (Arg->hasReg()) {
				425	assert(Ty != IceType_i64);
				426	OperandX8632Mem *Mem = OperandX8632Mem::create(
				427	Func, Ty, FramePtr,
				428	Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
				429	_mov(Arg, Mem);
				430	}
				431	InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
				432	}
				433
				434	void TargetX8632::addProlog(CfgNode *Node) {
				435	// If SimpleCoalescing is false, each variable without a register
				436	// gets its own unique stack slot, which leads to large stack
				437	// frames. If SimpleCoalescing is true, then each "global" variable
				438	// without a register gets its own slot, but "local" variable slots
				439	// are reused across basic blocks. E.g., if A and B are local to
				440	// block 1 and C is local to block 2, then C may share a slot with A
				441	// or B.
				442	const bool SimpleCoalescing = true;
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	443	size_t InArgsSizeBytes = 0;
				444	size_t RetIpSizeBytes = 4;
				445	size_t PreservedRegsSizeBytes = 0;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	446	LocalsSizeBytes = 0;
				447	Context.init(Node);
				448	Context.setInsertPoint(Context.getCur());
				449
				450	// Determine stack frame offsets for each Variable without a
				451	// register assignment. This can be done as one variable per stack
				452	// slot. Or, do coalescing by running the register allocator again
				453	// with an infinite set of registers (as a side effect, this gives
				454	// variables a second chance at physical register assignment).
				455	//
				456	// A middle ground approach is to leverage sparsity and allocate one
				457	// block of space on the frame for globals (variables with
				458	// multi-block lifetime), and one block to share for locals
				459	// (single-block lifetime).
				460
				461	llvm::SmallBitVector CalleeSaves =
				462	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				463
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	464	size_t GlobalsSize = 0;
				465	std::vector<size_t> LocalsSize(Func->getNumNodes());
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	466
				467	// Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
				468	// LocalsSizeBytes.
				469	RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
				470	const VarList &Variables = Func->getVariables();
				471	const VarList &Args = Func->getArgs();
				472	for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
				473	I != E; ++I) {
				474	Variable Var = I;
				475	if (Var->hasReg()) {
				476	RegsUsed[Var->getRegNum()] = true;
				477	continue;
				478	}
				479	// An argument passed on the stack already has a stack slot.
				480	if (Var->getIsArg())
				481	continue;
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	482	// An unreferenced variable doesn't need a stack slot.
				483	if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
				484	continue;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	485	// A spill slot linked to a variable with a stack slot should reuse
				486	// that stack slot.
				487	if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
				488	if (Variable *Linked = Var->getPreferredRegister()) {
				489	if (!Linked->hasReg())
				490	continue;
				491	}
				492	}
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	493	size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	494	if (SimpleCoalescing) {
				495	if (Var->isMultiblockLife()) {
				496	GlobalsSize += Increment;
				497	} else {
				498	SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
				499	LocalsSize[NodeIndex] += Increment;
				500	if (LocalsSize[NodeIndex] > LocalsSizeBytes)
				501	LocalsSizeBytes = LocalsSize[NodeIndex];
				502	}
				503	} else {
				504	LocalsSizeBytes += Increment;
				505	}
				506	}
				507	LocalsSizeBytes += GlobalsSize;
				508
				509	// Add push instructions for preserved registers.
				510	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				511	if (CalleeSaves[i] && RegsUsed[i]) {
				512	PreservedRegsSizeBytes += 4;
				513	const bool SuppressStackAdjustment = true;
				514	_push(getPhysicalRegister(i), SuppressStackAdjustment);
				515	}
				516	}
				517
				518	// Generate "push ebp; mov ebp, esp"
				519	if (IsEbpBasedFrame) {
				520	assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
				521	.count() == 0);
				522	PreservedRegsSizeBytes += 4;
				523	Variable *ebp = getPhysicalRegister(Reg_ebp);
				524	Variable *esp = getPhysicalRegister(Reg_esp);
				525	const bool SuppressStackAdjustment = true;
				526	_push(ebp, SuppressStackAdjustment);
				527	_mov(ebp, esp);
				528	}
				529
				530	// Generate "sub esp, LocalsSizeBytes"
				531	if (LocalsSizeBytes)
				532	_sub(getPhysicalRegister(Reg_esp),
				533	Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
				534
				535	resetStackAdjustment();
				536
				537	// Fill in stack offsets for args, and copy args into registers for
				538	// those that were register-allocated. Args are pushed right to
				539	// left, so Arg[0] is closest to the stack/frame pointer.
				540	//
				541	// TODO: Make this right for different width args, calling
				542	// conventions, etc. For one thing, args passed in registers will
				543	// need to be copied/shuffled to their home registers (the
				544	// RegManager code may have some permutation logic to leverage),
				545	// and if they have no home register, home space will need to be
				546	// allocated on the stack to copy into.
				547	Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	548	size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	549	if (!IsEbpBasedFrame)
				550	BasicFrameOffset += LocalsSizeBytes;
				551	for (SizeT i = 0; i < Args.size(); ++i) {
				552	Variable *Arg = Args[i];
				553	setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				554	}
				555
				556	// Fill in stack offsets for locals.
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	557	size_t TotalGlobalsSize = GlobalsSize;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	558	GlobalsSize = 0;
				559	LocalsSize.assign(LocalsSize.size(), 0);
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	560	size_t NextStackOffset = 0;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	561	for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
				562	I != E; ++I) {
				563	Variable Var = I;
				564	if (Var->hasReg()) {
				565	RegsUsed[Var->getRegNum()] = true;
				566	continue;
				567	}
				568	if (Var->getIsArg())
				569	continue;
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	570	if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
				571	continue;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	572	if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
				573	if (Variable *Linked = Var->getPreferredRegister()) {
				574	if (!Linked->hasReg()) {
				575	// TODO: Make sure Linked has already been assigned a stack
				576	// slot.
				577	Var->setStackOffset(Linked->getStackOffset());
				578	continue;
				579	}
				580	}
				581	}
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	582	size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	583	if (SimpleCoalescing) {
				584	if (Var->isMultiblockLife()) {
				585	GlobalsSize += Increment;
				586	NextStackOffset = GlobalsSize;
				587	} else {
				588	SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
				589	LocalsSize[NodeIndex] += Increment;
				590	NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
				591	}
				592	} else {
				593	NextStackOffset += Increment;
				594	}
				595	if (IsEbpBasedFrame)
				596	Var->setStackOffset(-NextStackOffset);
				597	else
				598	Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
				599	}
				600	this->FrameSizeLocals = NextStackOffset;
				601	this->HasComputedFrame = true;
				602
				603	if (Func->getContext()->isVerbose(IceV_Frame)) {
				604	Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
				605	<< "\n"
				606	<< "InArgsSizeBytes=" << InArgsSizeBytes
				607	<< "\n"
				608	<< "PreservedRegsSizeBytes="
				609	<< PreservedRegsSizeBytes << "\n";
				610	}
				611	}
				612
				613	void TargetX8632::addEpilog(CfgNode *Node) {
				614	InstList &Insts = Node->getInsts();
				615	InstList::reverse_iterator RI, E;
				616	for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
				617	if (llvm::isa<InstX8632Ret>(*RI))
				618	break;
				619	}
				620	if (RI == E)
				621	return;
				622
				623	// Convert the reverse_iterator position into its corresponding
				624	// (forward) iterator position.
				625	InstList::iterator InsertPoint = RI.base();
				626	--InsertPoint;
				627	Context.init(Node);
				628	Context.setInsertPoint(InsertPoint);
				629
				630	Variable *esp = getPhysicalRegister(Reg_esp);
				631	if (IsEbpBasedFrame) {
				632	Variable *ebp = getPhysicalRegister(Reg_ebp);
				633	_mov(esp, ebp);
				634	_pop(ebp);
				635	} else {
				636	// add esp, LocalsSizeBytes
				637	if (LocalsSizeBytes)
				638	_add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
				639	}
				640
				641	// Add pop instructions for preserved registers.
				642	llvm::SmallBitVector CalleeSaves =
				643	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				644	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				645	SizeT j = CalleeSaves.size() - i - 1;
				646	if (j == Reg_ebp && IsEbpBasedFrame)
				647	continue;
				648	if (CalleeSaves[j] && RegsUsed[j]) {
				649	_pop(getPhysicalRegister(j));
				650	}
				651	}
				652	}
				653
Jim Stichnoth	f61d5b2	2014-05-23 13:31:24 -0700	[diff] [blame]	654	template <typename T> struct PoolTypeConverter {};
				655
				656	template <> struct PoolTypeConverter<float> {
				657	typedef float PrimitiveFpType;
				658	typedef uint32_t PrimitiveIntType;
				659	typedef ConstantFloat IceType;
				660	static const Type Ty = IceType_f32;
				661	static const char *TypeName;
				662	static const char *AsmTag;
				663	static const char *PrintfString;
				664	};
				665	const char *PoolTypeConverter<float>::TypeName = "float";
				666	const char *PoolTypeConverter<float>::AsmTag = ".long";
				667	const char *PoolTypeConverter<float>::PrintfString = "0x%x";
				668
				669	template <> struct PoolTypeConverter<double> {
				670	typedef double PrimitiveFpType;
				671	typedef uint64_t PrimitiveIntType;
				672	typedef ConstantDouble IceType;
				673	static const Type Ty = IceType_f64;
				674	static const char *TypeName;
				675	static const char *AsmTag;
				676	static const char *PrintfString;
				677	};
				678	const char *PoolTypeConverter<double>::TypeName = "double";
				679	const char *PoolTypeConverter<double>::AsmTag = ".quad";
				680	const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
				681
				682	template <typename T> void TargetX8632::emitConstantPool() const {
				683	Ostream &Str = Ctx->getStrEmit();
				684	Type Ty = T::Ty;
				685	SizeT Align = typeAlignInBytes(Ty);
				686	ConstantList Pool = Ctx->getConstantPool(Ty);
				687
				688	Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
				689	<< "\n";
				690	Str << "\t.align\t" << Align << "\n";
				691	for (ConstantList::const_iterator I = Pool.begin(), E = Pool.end(); I != E;
				692	++I) {
				693	typename T::IceType Const = llvm::cast<typename T::IceType>(I);
				694	typename T::PrimitiveFpType Value = Const->getValue();
				695	// Use memcpy() to copy bits from Value into RawValue in a way
				696	// that avoids breaking strict-aliasing rules.
				697	typename T::PrimitiveIntType RawValue;
				698	memcpy(&RawValue, &Value, sizeof(Value));
				699	char buf[30];
				700	int CharsPrinted =
				701	snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
				702	assert(CharsPrinted >= 0 &&
				703	(size_t)CharsPrinted < llvm::array_lengthof(buf));
				704	(void)CharsPrinted; // avoid warnings if asserts are disabled
				705	Str << "L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
				706	Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
				707	<< Value << "\n";
				708	}
				709	}
				710
				711	void TargetX8632::emitConstants() const {
				712	emitConstantPool<PoolTypeConverter<float> >();
				713	emitConstantPool<PoolTypeConverter<double> >();
				714
				715	// No need to emit constants from the int pool since (for x86) they
				716	// are embedded as immediates in the instructions.
				717	}
				718
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	719	void TargetX8632::split64(Variable *Var) {
				720	switch (Var->getType()) {
				721	default:
				722	return;
				723	case IceType_i64:
				724	// TODO: Only consider F64 if we need to push each half when
				725	// passing as an argument to a function call. Note that each half
				726	// is still typed as I32.
				727	case IceType_f64:
				728	break;
				729	}
				730	Variable *Lo = Var->getLo();
				731	Variable *Hi = Var->getHi();
				732	if (Lo) {
				733	assert(Hi);
				734	return;
				735	}
				736	assert(Hi == NULL);
				737	Lo = Func->makeVariable(IceType_i32, Context.getNode(),
				738	Var->getName() + "__lo");
				739	Hi = Func->makeVariable(IceType_i32, Context.getNode(),
				740	Var->getName() + "__hi");
				741	Var->setLoHi(Lo, Hi);
				742	if (Var->getIsArg()) {
				743	Lo->setIsArg(Func);
				744	Hi->setIsArg(Func);
				745	}
				746	}
				747
				748	Operand TargetX8632::loOperand(Operand Operand) {
				749	assert(Operand->getType() == IceType_i64);
				750	if (Operand->getType() != IceType_i64)
				751	return Operand;
				752	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				753	split64(Var);
				754	return Var->getLo();
				755	}
				756	if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
				757	uint64_t Mask = (1ull << 32) - 1;
				758	return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);
				759	}
				760	if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
				761	return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
				762	Mem->getOffset(), Mem->getIndex(),
				763	Mem->getShift());
				764	}
				765	llvm_unreachable("Unsupported operand type");
				766	return NULL;
				767	}
				768
				769	Operand TargetX8632::hiOperand(Operand Operand) {
				770	assert(Operand->getType() == IceType_i64);
				771	if (Operand->getType() != IceType_i64)
				772	return Operand;
				773	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				774	split64(Var);
				775	return Var->getHi();
				776	}
				777	if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
				778	return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);
				779	}
				780	if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
				781	Constant *Offset = Mem->getOffset();
				782	if (Offset == NULL)
				783	Offset = Ctx->getConstantInt(IceType_i32, 4);
				784	else if (ConstantInteger *IntOffset =
				785	llvm::dyn_cast<ConstantInteger>(Offset)) {
				786	Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());
				787	} else if (ConstantRelocatable *SymOffset =
				788	llvm::dyn_cast<ConstantRelocatable>(Offset)) {
				789	Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
				790	SymOffset->getName());
				791	}
				792	return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
				793	Mem->getIndex(), Mem->getShift());
				794	}
				795	llvm_unreachable("Unsupported operand type");
				796	return NULL;
				797	}
				798
				799	llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
				800	RegSetMask Exclude) const {
				801	llvm::SmallBitVector Registers(Reg_NUM);
				802
				803	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				804	frameptr, isI8, isInt, isFP) \
				805	if (scratch && (Include & RegSet_CallerSave)) \
				806	Registers[val] = true; \
				807	if (preserved && (Include & RegSet_CalleeSave)) \
				808	Registers[val] = true; \
				809	if (stackptr && (Include & RegSet_StackPointer)) \
				810	Registers[val] = true; \
				811	if (frameptr && (Include & RegSet_FramePointer)) \
				812	Registers[val] = true; \
				813	if (scratch && (Exclude & RegSet_CallerSave)) \
				814	Registers[val] = false; \
				815	if (preserved && (Exclude & RegSet_CalleeSave)) \
				816	Registers[val] = false; \
				817	if (stackptr && (Exclude & RegSet_StackPointer)) \
				818	Registers[val] = false; \
				819	if (frameptr && (Exclude & RegSet_FramePointer)) \
				820	Registers[val] = false;
				821
				822	REGX8632_TABLE
				823
				824	#undef X
				825
				826	return Registers;
				827	}
				828
				829	void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
				830	IsEbpBasedFrame = true;
				831	// TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
				832	// the number of adjustments of esp, etc.
				833	Variable *esp = getPhysicalRegister(Reg_esp);
				834	Operand *TotalSize = legalize(Inst->getSizeInBytes());
				835	Variable *Dest = Inst->getDest();
				836	_sub(esp, TotalSize);
				837	_mov(Dest, esp);
				838	}
				839
				840	void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
				841	Variable *Dest = Inst->getDest();
				842	Operand *Src0 = legalize(Inst->getSrc(0));
				843	Operand *Src1 = legalize(Inst->getSrc(1));
				844	if (Dest->getType() == IceType_i64) {
				845	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				846	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				847	Operand *Src0Lo = loOperand(Src0);
				848	Operand *Src0Hi = hiOperand(Src0);
				849	Operand *Src1Lo = loOperand(Src1);
				850	Operand *Src1Hi = hiOperand(Src1);
				851	Variable T_Lo = NULL, T_Hi = NULL;
				852	switch (Inst->getOp()) {
Jim Stichnoth	4376d29	2014-05-23 13:39:02 -0700	[diff] [blame]	853	case InstArithmetic::_num:
				854	llvm_unreachable("Unknown arithmetic operator");
				855	break;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	856	case InstArithmetic::Add:
				857	_mov(T_Lo, Src0Lo);
				858	_add(T_Lo, Src1Lo);
				859	_mov(DestLo, T_Lo);
				860	_mov(T_Hi, Src0Hi);
				861	_adc(T_Hi, Src1Hi);
				862	_mov(DestHi, T_Hi);
				863	break;
				864	case InstArithmetic::And:
				865	_mov(T_Lo, Src0Lo);
				866	_and(T_Lo, Src1Lo);
				867	_mov(DestLo, T_Lo);
				868	_mov(T_Hi, Src0Hi);
				869	_and(T_Hi, Src1Hi);
				870	_mov(DestHi, T_Hi);
				871	break;
				872	case InstArithmetic::Or:
				873	_mov(T_Lo, Src0Lo);
				874	_or(T_Lo, Src1Lo);
				875	_mov(DestLo, T_Lo);
				876	_mov(T_Hi, Src0Hi);
				877	_or(T_Hi, Src1Hi);
				878	_mov(DestHi, T_Hi);
				879	break;
				880	case InstArithmetic::Xor:
				881	_mov(T_Lo, Src0Lo);
				882	_xor(T_Lo, Src1Lo);
				883	_mov(DestLo, T_Lo);
				884	_mov(T_Hi, Src0Hi);
				885	_xor(T_Hi, Src1Hi);
				886	_mov(DestHi, T_Hi);
				887	break;
				888	case InstArithmetic::Sub:
				889	_mov(T_Lo, Src0Lo);
				890	_sub(T_Lo, Src1Lo);
				891	_mov(DestLo, T_Lo);
				892	_mov(T_Hi, Src0Hi);
				893	_sbb(T_Hi, Src1Hi);
				894	_mov(DestHi, T_Hi);
				895	break;
				896	case InstArithmetic::Mul: {
				897	Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;
				898	Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
				899	Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);
				900	// gcc does the following:
				901	// a=b*c ==>
				902	// t1 = b.hi; t1 *=(imul) c.lo
				903	// t2 = c.hi; t2 *=(imul) b.lo
				904	// t3:eax = b.lo
				905	// t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
				906	// a.lo = t4.lo
				907	// t4.hi += t1
				908	// t4.hi += t2
				909	// a.hi = t4.hi
				910	_mov(T_1, Src0Hi);
				911	_imul(T_1, Src1Lo);
				912	_mov(T_2, Src1Hi);
				913	_imul(T_2, Src0Lo);
				914	_mov(T_3, Src0Lo, Reg_eax);
				915	_mul(T_4Lo, T_3, Src1Lo);
				916	// The mul instruction produces two dest variables, edx:eax. We
				917	// create a fake definition of edx to account for this.
				918	Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
				919	_mov(DestLo, T_4Lo);
				920	_add(T_4Hi, T_1);
				921	_add(T_4Hi, T_2);
				922	_mov(DestHi, T_4Hi);
				923	} break;
				924	case InstArithmetic::Shl: {
				925	// TODO: Refactor the similarities between Shl, Lshr, and Ashr.
				926	// gcc does the following:
				927	// a=b<<c ==>
				928	// t1:ecx = c.lo & 0xff
				929	// t2 = b.lo
				930	// t3 = b.hi
				931	// t3 = shld t3, t2, t1
				932	// t2 = shl t2, t1
				933	// test t1, 0x20
				934	// je L1
				935	// use(t3)
				936	// t3 = t2
				937	// t2 = 0
				938	// L1:
				939	// a.lo = t2
				940	// a.hi = t3
				941	Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;
				942	Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
				943	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				944	InstX8632Label *Label = InstX8632Label::create(Func, this);
				945	_mov(T_1, Src1Lo, Reg_ecx);
				946	_mov(T_2, Src0Lo);
				947	_mov(T_3, Src0Hi);
				948	_shld(T_3, T_2, T_1);
				949	_shl(T_2, T_1);
				950	_test(T_1, BitTest);
				951	_br(InstX8632Br::Br_e, Label);
				952	// Because of the intra-block control flow, we need to fake a use
				953	// of T_3 to prevent its earlier definition from being dead-code
				954	// eliminated in the presence of its later definition.
				955	Context.insert(InstFakeUse::create(Func, T_3));
				956	_mov(T_3, T_2);
				957	_mov(T_2, Zero);
				958	Context.insert(Label);
				959	_mov(DestLo, T_2);
				960	_mov(DestHi, T_3);
				961	} break;
				962	case InstArithmetic::Lshr: {
				963	// a=b>>c (unsigned) ==>
				964	// t1:ecx = c.lo & 0xff
				965	// t2 = b.lo
				966	// t3 = b.hi
				967	// t2 = shrd t2, t3, t1
				968	// t3 = shr t3, t1
				969	// test t1, 0x20
				970	// je L1
				971	// use(t2)
				972	// t2 = t3
				973	// t3 = 0
				974	// L1:
				975	// a.lo = t2
				976	// a.hi = t3
				977	Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;
				978	Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
				979	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				980	InstX8632Label *Label = InstX8632Label::create(Func, this);
				981	_mov(T_1, Src1Lo, Reg_ecx);
				982	_mov(T_2, Src0Lo);
				983	_mov(T_3, Src0Hi);
				984	_shrd(T_2, T_3, T_1);
				985	_shr(T_3, T_1);
				986	_test(T_1, BitTest);
				987	_br(InstX8632Br::Br_e, Label);
				988	// Because of the intra-block control flow, we need to fake a use
				989	// of T_3 to prevent its earlier definition from being dead-code
				990	// eliminated in the presence of its later definition.
				991	Context.insert(InstFakeUse::create(Func, T_2));
				992	_mov(T_2, T_3);
				993	_mov(T_3, Zero);
				994	Context.insert(Label);
				995	_mov(DestLo, T_2);
				996	_mov(DestHi, T_3);
				997	} break;
				998	case InstArithmetic::Ashr: {
				999	// a=b>>c (signed) ==>
				1000	// t1:ecx = c.lo & 0xff
				1001	// t2 = b.lo
				1002	// t3 = b.hi
				1003	// t2 = shrd t2, t3, t1
				1004	// t3 = sar t3, t1
				1005	// test t1, 0x20
				1006	// je L1
				1007	// use(t2)
				1008	// t2 = t3
				1009	// t3 = sar t3, 0x1f
				1010	// L1:
				1011	// a.lo = t2
				1012	// a.hi = t3
				1013	Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;
				1014	Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
				1015	Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);
				1016	InstX8632Label *Label = InstX8632Label::create(Func, this);
				1017	_mov(T_1, Src1Lo, Reg_ecx);
				1018	_mov(T_2, Src0Lo);
				1019	_mov(T_3, Src0Hi);
				1020	_shrd(T_2, T_3, T_1);
				1021	_sar(T_3, T_1);
				1022	_test(T_1, BitTest);
				1023	_br(InstX8632Br::Br_e, Label);
				1024	// Because of the intra-block control flow, we need to fake a use
				1025	// of T_3 to prevent its earlier definition from being dead-code
				1026	// eliminated in the presence of its later definition.
				1027	Context.insert(InstFakeUse::create(Func, T_2));
				1028	_mov(T_2, T_3);
				1029	_sar(T_3, SignExtend);
				1030	Context.insert(Label);
				1031	_mov(DestLo, T_2);
				1032	_mov(DestHi, T_3);
				1033	} break;
				1034	case InstArithmetic::Udiv: {
				1035	const SizeT MaxSrcs = 2;
				1036	InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
				1037	Call->addArg(Inst->getSrc(0));
				1038	Call->addArg(Inst->getSrc(1));
				1039	lowerCall(Call);
				1040	} break;
				1041	case InstArithmetic::Sdiv: {
				1042	const SizeT MaxSrcs = 2;
				1043	InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
				1044	Call->addArg(Inst->getSrc(0));
				1045	Call->addArg(Inst->getSrc(1));
				1046	lowerCall(Call);
				1047	} break;
				1048	case InstArithmetic::Urem: {
				1049	const SizeT MaxSrcs = 2;
				1050	InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
				1051	Call->addArg(Inst->getSrc(0));
				1052	Call->addArg(Inst->getSrc(1));
				1053	lowerCall(Call);
				1054	} break;
				1055	case InstArithmetic::Srem: {
				1056	const SizeT MaxSrcs = 2;
				1057	InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
				1058	Call->addArg(Inst->getSrc(0));
				1059	Call->addArg(Inst->getSrc(1));
				1060	lowerCall(Call);
				1061	} break;
				1062	case InstArithmetic::Fadd:
				1063	case InstArithmetic::Fsub:
				1064	case InstArithmetic::Fmul:
				1065	case InstArithmetic::Fdiv:
				1066	case InstArithmetic::Frem:
				1067	llvm_unreachable("FP instruction with i64 type");
				1068	break;
				1069	}
				1070	} else { // Dest->getType() != IceType_i64
				1071	Variable *T_edx = NULL;
				1072	Variable *T = NULL;
				1073	switch (Inst->getOp()) {
Jim Stichnoth	4376d29	2014-05-23 13:39:02 -0700	[diff] [blame]	1074	case InstArithmetic::_num:
				1075	llvm_unreachable("Unknown arithmetic operator");
				1076	break;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1077	case InstArithmetic::Add:
				1078	_mov(T, Src0);
				1079	_add(T, Src1);
				1080	_mov(Dest, T);
				1081	break;
				1082	case InstArithmetic::And:
				1083	_mov(T, Src0);
				1084	_and(T, Src1);
				1085	_mov(Dest, T);
				1086	break;
				1087	case InstArithmetic::Or:
				1088	_mov(T, Src0);
				1089	_or(T, Src1);
				1090	_mov(Dest, T);
				1091	break;
				1092	case InstArithmetic::Xor:
				1093	_mov(T, Src0);
				1094	_xor(T, Src1);
				1095	_mov(Dest, T);
				1096	break;
				1097	case InstArithmetic::Sub:
				1098	_mov(T, Src0);
				1099	_sub(T, Src1);
				1100	_mov(Dest, T);
				1101	break;
				1102	case InstArithmetic::Mul:
				1103	// TODO: Optimize for llvm::isa<Constant>(Src1)
				1104	// TODO: Strength-reduce multiplications by a constant,
				1105	// particularly -1 and powers of 2. Advanced: use lea to
				1106	// multiply by 3, 5, 9.
				1107	//
				1108	// The 8-bit version of imul only allows the form "imul r/m8"
				1109	// where T must be in eax.
				1110	if (Dest->getType() == IceType_i8)
				1111	_mov(T, Src0, Reg_eax);
				1112	else
				1113	_mov(T, Src0);
				1114	_imul(T, Src1);
				1115	_mov(Dest, T);
				1116	break;
				1117	case InstArithmetic::Shl:
				1118	_mov(T, Src0);
				1119	if (!llvm::isa<Constant>(Src1))
				1120	Src1 = legalizeToVar(Src1, false, Reg_ecx);
				1121	_shl(T, Src1);
				1122	_mov(Dest, T);
				1123	break;
				1124	case InstArithmetic::Lshr:
				1125	_mov(T, Src0);
				1126	if (!llvm::isa<Constant>(Src1))
				1127	Src1 = legalizeToVar(Src1, false, Reg_ecx);
				1128	_shr(T, Src1);
				1129	_mov(Dest, T);
				1130	break;
				1131	case InstArithmetic::Ashr:
				1132	_mov(T, Src0);
				1133	if (!llvm::isa<Constant>(Src1))
				1134	Src1 = legalizeToVar(Src1, false, Reg_ecx);
				1135	_sar(T, Src1);
				1136	_mov(Dest, T);
				1137	break;
				1138	case InstArithmetic::Udiv:
Jan Voung	70d6883	2014-06-17 10:02:37 -0700	[diff] [blame]	1139	// div and idiv are the few arithmetic operators that do not allow
				1140	// immediates as the operand.
				1141	Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1142	if (Dest->getType() == IceType_i8) {
				1143	Variable *T_ah = NULL;
				1144	Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
				1145	_mov(T, Src0, Reg_eax);
				1146	_mov(T_ah, Zero, Reg_ah);
				1147	_div(T, Src1, T_ah);
				1148	_mov(Dest, T);
				1149	} else {
				1150	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1151	_mov(T, Src0, Reg_eax);
				1152	_mov(T_edx, Zero, Reg_edx);
				1153	_div(T, Src1, T_edx);
				1154	_mov(Dest, T);
				1155	}
				1156	break;
				1157	case InstArithmetic::Sdiv:
Jan Voung	70d6883	2014-06-17 10:02:37 -0700	[diff] [blame]	1158	Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1159	T_edx = makeReg(IceType_i32, Reg_edx);
				1160	_mov(T, Src0, Reg_eax);
				1161	_cdq(T_edx, T);
				1162	_idiv(T, Src1, T_edx);
				1163	_mov(Dest, T);
				1164	break;
				1165	case InstArithmetic::Urem:
Jan Voung	70d6883	2014-06-17 10:02:37 -0700	[diff] [blame]	1166	Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1167	if (Dest->getType() == IceType_i8) {
				1168	Variable *T_ah = NULL;
				1169	Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
				1170	_mov(T, Src0, Reg_eax);
				1171	_mov(T_ah, Zero, Reg_ah);
				1172	_div(T_ah, Src1, T);
				1173	_mov(Dest, T_ah);
				1174	} else {
				1175	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1176	_mov(T_edx, Zero, Reg_edx);
				1177	_mov(T, Src0, Reg_eax);
				1178	_div(T_edx, Src1, T);
				1179	_mov(Dest, T_edx);
				1180	}
				1181	break;
				1182	case InstArithmetic::Srem:
Jan Voung	70d6883	2014-06-17 10:02:37 -0700	[diff] [blame]	1183	Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1184	T_edx = makeReg(IceType_i32, Reg_edx);
				1185	_mov(T, Src0, Reg_eax);
				1186	_cdq(T_edx, T);
				1187	_idiv(T_edx, Src1, T);
				1188	_mov(Dest, T_edx);
				1189	break;
				1190	case InstArithmetic::Fadd:
				1191	_mov(T, Src0);
				1192	_addss(T, Src1);
				1193	_mov(Dest, T);
				1194	break;
				1195	case InstArithmetic::Fsub:
				1196	_mov(T, Src0);
				1197	_subss(T, Src1);
				1198	_mov(Dest, T);
				1199	break;
				1200	case InstArithmetic::Fmul:
				1201	_mov(T, Src0);
				1202	_mulss(T, Src1);
				1203	_mov(Dest, T);
				1204	break;
				1205	case InstArithmetic::Fdiv:
				1206	_mov(T, Src0);
				1207	_divss(T, Src1);
				1208	_mov(Dest, T);
				1209	break;
				1210	case InstArithmetic::Frem: {
				1211	const SizeT MaxSrcs = 2;
				1212	Type Ty = Dest->getType();
				1213	InstCall *Call =
				1214	makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);
				1215	Call->addArg(Src0);
				1216	Call->addArg(Src1);
				1217	return lowerCall(Call);
				1218	} break;
				1219	}
				1220	}
				1221	}
				1222
				1223	void TargetX8632::lowerAssign(const InstAssign *Inst) {
				1224	Variable *Dest = Inst->getDest();
				1225	Operand *Src0 = Inst->getSrc(0);
				1226	assert(Dest->getType() == Src0->getType());
				1227	if (Dest->getType() == IceType_i64) {
				1228	Src0 = legalize(Src0);
				1229	Operand *Src0Lo = loOperand(Src0);
				1230	Operand *Src0Hi = hiOperand(Src0);
				1231	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1232	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1233	Variable T_Lo = NULL, T_Hi = NULL;
				1234	_mov(T_Lo, Src0Lo);
				1235	_mov(DestLo, T_Lo);
				1236	_mov(T_Hi, Src0Hi);
				1237	_mov(DestHi, T_Hi);
				1238	} else {
				1239	const bool AllowOverlap = true;
				1240	// RI is either a physical register or an immediate.
				1241	Operand *RI = legalize(Src0, Legal_Reg \| Legal_Imm, AllowOverlap);
				1242	_mov(Dest, RI);
				1243	}
				1244	}
				1245
				1246	void TargetX8632::lowerBr(const InstBr *Inst) {
				1247	if (Inst->isUnconditional()) {
				1248	_br(Inst->getTargetUnconditional());
				1249	} else {
				1250	Operand *Src0 = legalize(Inst->getCondition());
				1251	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1252	_cmp(Src0, Zero);
				1253	_br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
				1254	}
				1255	}
				1256
				1257	void TargetX8632::lowerCall(const InstCall *Instr) {
				1258	// Generate a sequence of push instructions, pushing right to left,
				1259	// keeping track of stack offsets in case a push involves a stack
				1260	// operand and we are using an esp-based frame.
				1261	uint32_t StackOffset = 0;
				1262	// TODO: If for some reason the call instruction gets dead-code
				1263	// eliminated after lowering, we would need to ensure that the
				1264	// pre-call push instructions and the post-call esp adjustment get
				1265	// eliminated as well.
				1266	for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {
				1267	Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));
				1268	if (Arg->getType() == IceType_i64) {
				1269	_push(hiOperand(Arg));
				1270	_push(loOperand(Arg));
				1271	} else if (Arg->getType() == IceType_f64) {
				1272	// If the Arg turns out to be a memory operand, we need to push
				1273	// 8 bytes, which requires two push instructions. This ends up
				1274	// being somewhat clumsy in the current IR, so we use a
				1275	// workaround. Force the operand into a (xmm) register, and
				1276	// then push the register. An xmm register push is actually not
				1277	// possible in x86, but the Push instruction emitter handles
				1278	// this by decrementing the stack pointer and directly writing
				1279	// the xmm register value.
				1280	Variable *T = NULL;
				1281	_mov(T, Arg);
				1282	_push(T);
				1283	} else {
				1284	_push(Arg);
				1285	}
				1286	StackOffset += typeWidthInBytesOnStack(Arg->getType());
				1287	}
				1288	// Generate the call instruction. Assign its result to a temporary
				1289	// with high register allocation weight.
				1290	Variable *Dest = Instr->getDest();
				1291	Variable *eax = NULL; // doubles as RegLo as necessary
				1292	Variable *edx = NULL;
				1293	if (Dest) {
				1294	switch (Dest->getType()) {
				1295	case IceType_NUM:
				1296	llvm_unreachable("Invalid Call dest type");
				1297	break;
				1298	case IceType_void:
				1299	break;
				1300	case IceType_i1:
				1301	case IceType_i8:
				1302	case IceType_i16:
				1303	case IceType_i32:
				1304	eax = makeReg(Dest->getType(), Reg_eax);
				1305	break;
				1306	case IceType_i64:
				1307	eax = makeReg(IceType_i32, Reg_eax);
				1308	edx = makeReg(IceType_i32, Reg_edx);
				1309	break;
				1310	case IceType_f32:
				1311	case IceType_f64:
				1312	// Leave eax==edx==NULL, and capture the result with the fstp
				1313	// instruction.
				1314	break;
				1315	}
				1316	}
				1317	Operand *CallTarget = legalize(Instr->getCallTarget());
				1318	Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);
				1319	Context.insert(NewCall);
				1320	if (edx)
				1321	Context.insert(InstFakeDef::create(Func, edx));
				1322
				1323	// Add the appropriate offset to esp.
				1324	if (StackOffset) {
				1325	Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
				1326	_add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
				1327	}
				1328
				1329	// Insert a register-kill pseudo instruction.
				1330	VarList KilledRegs;
				1331	for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
				1332	if (ScratchRegs[i])
				1333	KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
				1334	}
				1335	Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
				1336
				1337	// Generate a FakeUse to keep the call live if necessary.
				1338	if (Instr->hasSideEffects() && eax) {
				1339	Inst *FakeUse = InstFakeUse::create(Func, eax);
				1340	Context.insert(FakeUse);
				1341	}
				1342
				1343	// Generate Dest=eax assignment.
				1344	if (Dest && eax) {
				1345	if (edx) {
				1346	split64(Dest);
				1347	Variable *DestLo = Dest->getLo();
				1348	Variable *DestHi = Dest->getHi();
				1349	DestLo->setPreferredRegister(eax, false);
				1350	DestHi->setPreferredRegister(edx, false);
				1351	_mov(DestLo, eax);
				1352	_mov(DestHi, edx);
				1353	} else {
				1354	Dest->setPreferredRegister(eax, false);
				1355	_mov(Dest, eax);
				1356	}
				1357	}
				1358
				1359	// Special treatment for an FP function which returns its result in
				1360	// st(0).
				1361	if (Dest &&
				1362	(Dest->getType() == IceType_f32 \|\| Dest->getType() == IceType_f64)) {
				1363	_fstp(Dest);
				1364	// If Dest ends up being a physical xmm register, the fstp emit
				1365	// code will route st(0) through a temporary stack slot.
				1366	}
				1367	}
				1368
				1369	void TargetX8632::lowerCast(const InstCast *Inst) {
				1370	// a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
				1371	InstCast::OpKind CastKind = Inst->getCastKind();
				1372	Variable *Dest = Inst->getDest();
				1373	// Src0RM is the source operand legalized to physical register or memory, but
				1374	// not immediate, since the relevant x86 native instructions don't allow an
				1375	// immediate operand. If the operand is an immediate, we could consider
				1376	// computing the strength-reduced result at translation time, but we're
				1377	// unlikely to see something like that in the bitcode that the optimizer
				1378	// wouldn't have already taken care of.
Jan Voung	70d6883	2014-06-17 10:02:37 -0700	[diff] [blame]	1379	Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1380	switch (CastKind) {
				1381	default:
				1382	Func->setError("Cast type not supported");
				1383	return;
				1384	case InstCast::Sext:
				1385	if (Dest->getType() == IceType_i64) {
				1386	// t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
				1387	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1388	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1389	Variable *T_Lo = makeReg(DestLo->getType());
				1390	if (Src0RM->getType() == IceType_i32)
				1391	_mov(T_Lo, Src0RM);
				1392	else
				1393	_movsx(T_Lo, Src0RM);
				1394	_mov(DestLo, T_Lo);
				1395	Variable *T_Hi = NULL;
				1396	Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);
				1397	_mov(T_Hi, T_Lo);
				1398	_sar(T_Hi, Shift);
				1399	_mov(DestHi, T_Hi);
				1400	} else {
				1401	// TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
				1402	// also copy to the high operand of a 64-bit variable.
				1403	// t1 = movsx src; dst = t1
				1404	Variable *T = makeReg(Dest->getType());
				1405	_movsx(T, Src0RM);
				1406	_mov(Dest, T);
				1407	}
				1408	break;
				1409	case InstCast::Zext:
				1410	if (Dest->getType() == IceType_i64) {
				1411	// t1=movzx src; dst.lo=t1; dst.hi=0
				1412	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1413	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1414	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1415	Variable *Tmp = makeReg(DestLo->getType());
				1416	if (Src0RM->getType() == IceType_i32)
				1417	_mov(Tmp, Src0RM);
				1418	else
				1419	_movzx(Tmp, Src0RM);
				1420	_mov(DestLo, Tmp);
				1421	_mov(DestHi, Zero);
				1422	} else if (Src0RM->getType() == IceType_i1) {
				1423	// t = Src0RM; t &= 1; Dest = t
				1424	Operand *One = Ctx->getConstantInt(IceType_i32, 1);
				1425	Variable *T = makeReg(IceType_i32);
				1426	_movzx(T, Src0RM);
				1427	_and(T, One);
				1428	_mov(Dest, T);
				1429	} else {
				1430	// t1 = movzx src; dst = t1
				1431	Variable *T = makeReg(Dest->getType());
				1432	_movzx(T, Src0RM);
				1433	_mov(Dest, T);
				1434	}
				1435	break;
				1436	case InstCast::Trunc: {
				1437	if (Src0RM->getType() == IceType_i64)
				1438	Src0RM = loOperand(Src0RM);
				1439	// t1 = trunc Src0RM; Dest = t1
				1440	Variable *T = NULL;
				1441	_mov(T, Src0RM);
				1442	_mov(Dest, T);
				1443	break;
				1444	}
				1445	case InstCast::Fptrunc:
				1446	case InstCast::Fpext: {
				1447	// t1 = cvt Src0RM; Dest = t1
				1448	Variable *T = makeReg(Dest->getType());
				1449	_cvt(T, Src0RM);
				1450	_mov(Dest, T);
				1451	break;
				1452	}
				1453	case InstCast::Fptosi:
				1454	if (Dest->getType() == IceType_i64) {
				1455	// Use a helper for converting floating-point values to 64-bit
				1456	// integers. SSE2 appears to have no way to convert from xmm
				1457	// registers to something like the edx:eax register pair, and
				1458	// gcc and clang both want to use x87 instructions complete with
				1459	// temporary manipulation of the status word. This helper is
				1460	// not needed for x86-64.
				1461	split64(Dest);
				1462	const SizeT MaxSrcs = 1;
				1463	Type SrcType = Inst->getSrc(0)->getType();
				1464	InstCall *Call = makeHelperCall(
				1465	SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);
				1466	// TODO: Call the correct compiler-rt helper function.
				1467	Call->addArg(Inst->getSrc(0));
				1468	lowerCall(Call);
				1469	} else {
				1470	// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
				1471	Variable *T_1 = makeReg(IceType_i32);
				1472	Variable *T_2 = makeReg(Dest->getType());
				1473	_cvt(T_1, Src0RM);
				1474	_mov(T_2, T_1); // T_1 and T_2 may have different integer types
				1475	_mov(Dest, T_2);
				1476	T_2->setPreferredRegister(T_1, true);
				1477	}
				1478	break;
				1479	case InstCast::Fptoui:
				1480	if (Dest->getType() == IceType_i64 \|\| Dest->getType() == IceType_i32) {
				1481	// Use a helper for both x86-32 and x86-64.
				1482	split64(Dest);
				1483	const SizeT MaxSrcs = 1;
				1484	Type DestType = Dest->getType();
				1485	Type SrcType = Src0RM->getType();
				1486	IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");
				1487	IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");
				1488	// Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
				1489	IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
				1490	// TODO: Call the correct compiler-rt helper function.
				1491	InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
				1492	Call->addArg(Inst->getSrc(0));
				1493	lowerCall(Call);
				1494	return;
				1495	} else {
				1496	// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
				1497	Variable *T_1 = makeReg(IceType_i32);
				1498	Variable *T_2 = makeReg(Dest->getType());
				1499	_cvt(T_1, Src0RM);
				1500	_mov(T_2, T_1); // T_1 and T_2 may have different integer types
				1501	_mov(Dest, T_2);
				1502	T_2->setPreferredRegister(T_1, true);
				1503	}
				1504	break;
				1505	case InstCast::Sitofp:
				1506	if (Src0RM->getType() == IceType_i64) {
				1507	// Use a helper for x86-32.
				1508	const SizeT MaxSrcs = 1;
				1509	Type DestType = Dest->getType();
				1510	InstCall *Call = makeHelperCall(
				1511	DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);
				1512	// TODO: Call the correct compiler-rt helper function.
				1513	Call->addArg(Inst->getSrc(0));
				1514	lowerCall(Call);
				1515	return;
				1516	} else {
				1517	// Sign-extend the operand.
				1518	// t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
				1519	Variable *T_1 = makeReg(IceType_i32);
				1520	Variable *T_2 = makeReg(Dest->getType());
				1521	if (Src0RM->getType() == IceType_i32)
				1522	_mov(T_1, Src0RM);
				1523	else
				1524	_movsx(T_1, Src0RM);
				1525	_cvt(T_2, T_1);
				1526	_mov(Dest, T_2);
				1527	}
				1528	break;
				1529	case InstCast::Uitofp:
				1530	if (Src0RM->getType() == IceType_i64 \|\| Src0RM->getType() == IceType_i32) {
				1531	// Use a helper for x86-32 and x86-64. Also use a helper for
				1532	// i32 on x86-32.
				1533	const SizeT MaxSrcs = 1;
				1534	Type DestType = Dest->getType();
				1535	IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");
				1536	IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");
				1537	// Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
				1538	IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
				1539	// TODO: Call the correct compiler-rt helper function.
				1540	InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
				1541	Call->addArg(Inst->getSrc(0));
				1542	lowerCall(Call);
				1543	return;
				1544	} else {
				1545	// Zero-extend the operand.
				1546	// t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
				1547	Variable *T_1 = makeReg(IceType_i32);
				1548	Variable *T_2 = makeReg(Dest->getType());
				1549	if (Src0RM->getType() == IceType_i32)
				1550	_mov(T_1, Src0RM);
				1551	else
				1552	_movzx(T_1, Src0RM);
				1553	_cvt(T_2, T_1);
				1554	_mov(Dest, T_2);
				1555	}
				1556	break;
				1557	case InstCast::Bitcast:
				1558	if (Dest->getType() == Src0RM->getType()) {
				1559	InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);
				1560	lowerAssign(Assign);
				1561	return;
				1562	}
				1563	switch (Dest->getType()) {
				1564	default:
				1565	llvm_unreachable("Unexpected Bitcast dest type");
				1566	case IceType_i32:
				1567	case IceType_f32: {
				1568	Type DestType = Dest->getType();
				1569	Type SrcType = Src0RM->getType();
				1570	assert((DestType == IceType_i32 && SrcType == IceType_f32) \|\|
				1571	(DestType == IceType_f32 && SrcType == IceType_i32));
				1572	// a.i32 = bitcast b.f32 ==>
				1573	// t.f32 = b.f32
				1574	// s.f32 = spill t.f32
				1575	// a.i32 = s.f32
				1576	Variable *T = NULL;
				1577	// TODO: Should be able to force a spill setup by calling legalize() with
				1578	// Legal_Mem and not Legal_Reg or Legal_Imm.
				1579	Variable *Spill = Func->makeVariable(SrcType, Context.getNode());
				1580	Spill->setWeight(RegWeight::Zero);
				1581	Spill->setPreferredRegister(Dest, true);
				1582	_mov(T, Src0RM);
				1583	_mov(Spill, T);
				1584	_mov(Dest, Spill);
				1585	} break;
				1586	case IceType_i64: {
				1587	assert(Src0RM->getType() == IceType_f64);
				1588	// a.i64 = bitcast b.f64 ==>
				1589	// s.f64 = spill b.f64
				1590	// t_lo.i32 = lo(s.f64)
				1591	// a_lo.i32 = t_lo.i32
				1592	// t_hi.i32 = hi(s.f64)
				1593	// a_hi.i32 = t_hi.i32
				1594	Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
				1595	Spill->setWeight(RegWeight::Zero);
				1596	Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
				1597	_mov(Spill, Src0RM);
				1598
				1599	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1600	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1601	Variable *T_Lo = makeReg(IceType_i32);
				1602	Variable *T_Hi = makeReg(IceType_i32);
				1603	VariableSplit *SpillLo =
				1604	VariableSplit::create(Func, Spill, VariableSplit::Low);
				1605	VariableSplit *SpillHi =
				1606	VariableSplit::create(Func, Spill, VariableSplit::High);
				1607
				1608	_mov(T_Lo, SpillLo);
				1609	_mov(DestLo, T_Lo);
				1610	_mov(T_Hi, SpillHi);
				1611	_mov(DestHi, T_Hi);
				1612	} break;
				1613	case IceType_f64: {
				1614	assert(Src0RM->getType() == IceType_i64);
				1615	// a.f64 = bitcast b.i64 ==>
				1616	// t_lo.i32 = b_lo.i32
				1617	// lo(s.f64) = t_lo.i32
				1618	// FakeUse(s.f64)
				1619	// t_hi.i32 = b_hi.i32
				1620	// hi(s.f64) = t_hi.i32
				1621	// a.f64 = s.f64
				1622	Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
				1623	Spill->setWeight(RegWeight::Zero);
				1624	Spill->setPreferredRegister(Dest, true);
				1625
				1626	Context.insert(InstFakeDef::create(Func, Spill));
				1627
				1628	Variable T_Lo = NULL, T_Hi = NULL;
				1629	VariableSplit *SpillLo =
				1630	VariableSplit::create(Func, Spill, VariableSplit::Low);
				1631	VariableSplit *SpillHi =
				1632	VariableSplit::create(Func, Spill, VariableSplit::High);
				1633	_mov(T_Lo, loOperand(Src0RM));
				1634	_store(T_Lo, SpillLo);
				1635	_mov(T_Hi, hiOperand(Src0RM));
				1636	_store(T_Hi, SpillHi);
				1637	_mov(Dest, Spill);
				1638	} break;
				1639	}
				1640	break;
				1641	}
				1642	}
				1643
				1644	void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
				1645	Operand *Src0 = Inst->getSrc(0);
				1646	Operand *Src1 = Inst->getSrc(1);
				1647	Variable *Dest = Inst->getDest();
				1648	// Lowering a = fcmp cond, b, c
				1649	// ucomiss b, c /* only if C1 != Br_None */
				1650	// /* but swap b,c order if SwapOperands==true */
				1651	// mov a, <default>
				1652	// j<C1> label /* only if C1 != Br_None */
				1653	// j<C2> label /* only if C2 != Br_None */
				1654	// FakeUse(a) /* only if C1 != Br_None */
				1655	// mov a, !<default> /* only if C1 != Br_None */
				1656	// label: /* only if C1 != Br_None */
				1657	InstFcmp::FCond Condition = Inst->getCondition();
				1658	size_t Index = static_cast<size_t>(Condition);
				1659	assert(Index < TableFcmpSize);
				1660	if (TableFcmp[Index].SwapOperands) {
				1661	Operand *Tmp = Src0;
				1662	Src0 = Src1;
				1663	Src1 = Tmp;
				1664	}
				1665	bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
				1666	bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
				1667	if (HasC1) {
				1668	Src0 = legalize(Src0);
				1669	Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);
				1670	Variable *T = NULL;
				1671	_mov(T, Src0);
				1672	_ucomiss(T, Src1RM);
				1673	}
				1674	Constant *Default =
				1675	Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
				1676	_mov(Dest, Default);
				1677	if (HasC1) {
				1678	InstX8632Label *Label = InstX8632Label::create(Func, this);
				1679	_br(TableFcmp[Index].C1, Label);
				1680	if (HasC2) {
				1681	_br(TableFcmp[Index].C2, Label);
				1682	}
				1683	Context.insert(InstFakeUse::create(Func, Dest));
				1684	Constant *NonDefault =
				1685	Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
				1686	_mov(Dest, NonDefault);
				1687	Context.insert(Label);
				1688	}
				1689	}
				1690
				1691	void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
				1692	Operand *Src0 = legalize(Inst->getSrc(0));
				1693	Operand *Src1 = legalize(Inst->getSrc(1));
				1694	Variable *Dest = Inst->getDest();
				1695
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	1696	// If Src1 is an immediate, or known to be a physical register, we can
				1697	// allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
				1698	// a physical register. (Actually, either Src0 or Src1 can be chosen for
				1699	// the physical register, but unfortunately we have to commit to one or
				1700	// the other before register allocation.)
				1701	bool IsSrc1ImmOrReg = false;
				1702	if (llvm::isa<Constant>(Src1)) {
				1703	IsSrc1ImmOrReg = true;
				1704	} else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
				1705	if (Var->hasReg())
				1706	IsSrc1ImmOrReg = true;
				1707	}
				1708
				1709	// Try to fuse a compare immediately followed by a conditional branch. This
				1710	// is possible when the compare dest and the branch source operands are the
				1711	// same, and are their only uses. TODO: implement this optimization for i64.
				1712	if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
				1713	if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
				1714	Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
				1715	Operand *Src0New =
				1716	legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
				1717	_cmp(Src0New, Src1);
				1718	_br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
				1719	NextBr->getTargetFalse());
				1720	// Skip over the following branch instruction.
				1721	NextBr->setDeleted();
				1722	Context.advanceNext();
				1723	return;
				1724	}
				1725	}
				1726
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1727	// a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
				1728	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1729	Constant *One = Ctx->getConstantInt(IceType_i32, 1);
				1730	if (Src0->getType() == IceType_i64) {
				1731	InstIcmp::ICond Condition = Inst->getCondition();
				1732	size_t Index = static_cast<size_t>(Condition);
				1733	assert(Index < TableIcmp64Size);
				1734	Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);
				1735	Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);
				1736	if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {
				1737	InstX8632Label *Label = InstX8632Label::create(Func, this);
				1738	_mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
				1739	_cmp(loOperand(Src0), Src1LoRI);
				1740	_br(InstX8632Br::Br_ne, Label);
				1741	_cmp(hiOperand(Src0), Src1HiRI);
				1742	_br(InstX8632Br::Br_ne, Label);
				1743	Context.insert(InstFakeUse::create(Func, Dest));
				1744	_mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
				1745	Context.insert(Label);
				1746	} else {
				1747	InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
				1748	InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
				1749	_mov(Dest, One);
				1750	_cmp(hiOperand(Src0), Src1HiRI);
				1751	_br(TableIcmp64[Index].C1, LabelTrue);
				1752	_br(TableIcmp64[Index].C2, LabelFalse);
				1753	_cmp(loOperand(Src0), Src1LoRI);
				1754	_br(TableIcmp64[Index].C3, LabelTrue);
				1755	Context.insert(LabelFalse);
				1756	Context.insert(InstFakeUse::create(Func, Dest));
				1757	_mov(Dest, Zero);
				1758	Context.insert(LabelTrue);
				1759	}
				1760	return;
				1761	}
				1762
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1763	// cmp b, c
				1764	Operand *Src0New =
				1765	legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
				1766	InstX8632Label *Label = InstX8632Label::create(Func, this);
				1767	_cmp(Src0New, Src1);
				1768	_mov(Dest, One);
				1769	_br(getIcmp32Mapping(Inst->getCondition()), Label);
				1770	Context.insert(InstFakeUse::create(Func, Dest));
				1771	_mov(Dest, Zero);
				1772	Context.insert(Label);
				1773	}
				1774
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	1775	namespace {
				1776
				1777	bool isAdd(const Inst *Inst) {
				1778	if (const InstArithmetic *Arith =
				1779	llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
				1780	return (Arith->getOp() == InstArithmetic::Add);
				1781	}
				1782	return false;
				1783	}
				1784
				1785	void computeAddressOpt(Variable &Base, Variable &Index, int32_t &Shift,
				1786	int32_t &Offset) {
				1787	(void)Offset; // TODO: pattern-match for non-zero offsets.
				1788	if (Base == NULL)
				1789	return;
				1790	// If the Base has more than one use or is live across multiple
				1791	// blocks, then don't go further. Alternatively (?), never consider
				1792	// a transformation that would change a variable that is currently
				1793	// not live across basic block boundaries into one that is.
				1794	if (Base->isMultiblockLife() /* \|\| Base->getUseCount() > 1*/)
				1795	return;
				1796
				1797	while (true) {
				1798	// Base is Base=Var ==>
				1799	// set Base=Var
				1800	const Inst *BaseInst = Base->getDefinition();
				1801	Operand *BaseOperand0 = BaseInst ? BaseInst->getSrc(0) : NULL;
				1802	Variable *BaseVariable0 = llvm::dyn_cast_or_null<Variable>(BaseOperand0);
				1803	// TODO: Helper function for all instances of assignment
				1804	// transitivity.
				1805	if (BaseInst && llvm::isa<InstAssign>(BaseInst) && BaseVariable0 &&
				1806	// TODO: ensure BaseVariable0 stays single-BB
				1807	true) {
				1808	Base = BaseVariable0;
				1809	continue;
				1810	}
				1811
				1812	// Index is Index=Var ==>
				1813	// set Index=Var
				1814
				1815	// Index==NULL && Base is Base=Var1+Var2 ==>
				1816	// set Base=Var1, Index=Var2, Shift=0
				1817	Operand *BaseOperand1 =
				1818	BaseInst && BaseInst->getSrcSize() >= 2 ? BaseInst->getSrc(1) : NULL;
				1819	Variable *BaseVariable1 = llvm::dyn_cast_or_null<Variable>(BaseOperand1);
				1820	if (Index == NULL && isAdd(BaseInst) && BaseVariable0 && BaseVariable1 &&
				1821	// TODO: ensure BaseVariable0 and BaseVariable1 stay single-BB
				1822	true) {
				1823	Base = BaseVariable0;
				1824	Index = BaseVariable1;
				1825	Shift = 0; // should already have been 0
				1826	continue;
				1827	}
				1828
				1829	// Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
				1830	// Index=Var, Shift+=log2(Const)
				1831	const Inst *IndexInst = Index ? Index->getDefinition() : NULL;
				1832	if (const InstArithmetic *ArithInst =
				1833	llvm::dyn_cast_or_null<InstArithmetic>(IndexInst)) {
				1834	Operand *IndexOperand0 = ArithInst->getSrc(0);
				1835	Variable *IndexVariable0 = llvm::dyn_cast<Variable>(IndexOperand0);
				1836	Operand *IndexOperand1 = ArithInst->getSrc(1);
				1837	ConstantInteger *IndexConstant1 =
				1838	llvm::dyn_cast<ConstantInteger>(IndexOperand1);
				1839	if (ArithInst->getOp() == InstArithmetic::Mul && IndexVariable0 &&
				1840	IndexOperand1->getType() == IceType_i32 && IndexConstant1) {
				1841	uint64_t Mult = IndexConstant1->getValue();
				1842	uint32_t LogMult;
				1843	switch (Mult) {
				1844	case 1:
				1845	LogMult = 0;
				1846	break;
				1847	case 2:
				1848	LogMult = 1;
				1849	break;
				1850	case 4:
				1851	LogMult = 2;
				1852	break;
				1853	case 8:
				1854	LogMult = 3;
				1855	break;
				1856	default:
				1857	LogMult = 4;
				1858	break;
				1859	}
				1860	if (Shift + LogMult <= 3) {
				1861	Index = IndexVariable0;
				1862	Shift += LogMult;
				1863	continue;
				1864	}
				1865	}
				1866	}
				1867
				1868	// Index is Index=Var<<Const && Const+Shift<=3 ==>
				1869	// Index=Var, Shift+=Const
				1870
				1871	// Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
				1872	// Index=Var, Shift+=log2(Const)
				1873
				1874	// Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
				1875	// swap(Index,Base)
				1876	// Similar for Base=Const*Var and Base=Var<<Const
				1877
				1878	// Base is Base=Var+Const ==>
				1879	// set Base=Var, Offset+=Const
				1880
				1881	// Base is Base=Const+Var ==>
				1882	// set Base=Var, Offset+=Const
				1883
				1884	// Base is Base=Var-Const ==>
				1885	// set Base=Var, Offset-=Const
				1886
				1887	// Index is Index=Var+Const ==>
				1888	// set Index=Var, Offset+=(Const<<Shift)
				1889
				1890	// Index is Index=Const+Var ==>
				1891	// set Index=Var, Offset+=(Const<<Shift)
				1892
				1893	// Index is Index=Var-Const ==>
				1894	// set Index=Var, Offset-=(Const<<Shift)
				1895
				1896	// TODO: consider overflow issues with respect to Offset.
				1897	// TODO: handle symbolic constants.
				1898	break;
				1899	}
				1900	}
				1901
				1902	} // anonymous namespace
				1903
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1904	void TargetX8632::lowerLoad(const InstLoad *Inst) {
				1905	// A Load instruction can be treated the same as an Assign
				1906	// instruction, after the source operand is transformed into an
				1907	// OperandX8632Mem operand. Note that the address mode
				1908	// optimization already creates an OperandX8632Mem operand, so it
				1909	// doesn't need another level of transformation.
				1910	Type Ty = Inst->getDest()->getType();
				1911	Operand *Src0 = Inst->getSourceAddress();
				1912	// Address mode optimization already creates an OperandX8632Mem
				1913	// operand, so it doesn't need another level of transformation.
				1914	if (!llvm::isa<OperandX8632Mem>(Src0)) {
				1915	Variable *Base = llvm::dyn_cast<Variable>(Src0);
				1916	Constant *Offset = llvm::dyn_cast<Constant>(Src0);
				1917	assert(Base \|\| Offset);
				1918	Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);
				1919	}
				1920
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	1921	// Fuse this load with a subsequent Arithmetic instruction in the
				1922	// following situations:
				1923	// a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
				1924	// a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
				1925	//
				1926	// TODO: Clean up and test thoroughly.
				1927	//
				1928	// TODO: Why limit to Arithmetic instructions? This could probably be
				1929	// applied to most any instruction type. Look at all source operands
				1930	// in the following instruction, and if there is one instance of the
				1931	// load instruction's dest variable, and that instruction ends that
				1932	// variable's live range, then make the substitution. Deal with
				1933	// commutativity optimization in the arithmetic instruction lowering.
				1934	InstArithmetic *NewArith = NULL;
				1935	if (InstArithmetic *Arith =
				1936	llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
				1937	Variable *DestLoad = Inst->getDest();
				1938	Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
				1939	Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
				1940	if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
				1941	DestLoad != Src0Arith) {
				1942	NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
				1943	Arith->getSrc(0), Src0);
				1944	} else if (Src0Arith == DestLoad && Arith->isCommutative() &&
				1945	Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
				1946	NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
				1947	Arith->getSrc(1), Src0);
				1948	}
				1949	if (NewArith) {
				1950	Arith->setDeleted();
				1951	Context.advanceNext();
				1952	lowerArithmetic(NewArith);
				1953	return;
				1954	}
				1955	}
				1956
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1957	InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
				1958	lowerAssign(Assign);
				1959	}
				1960
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	1961	void TargetX8632::doAddressOptLoad() {
				1962	Inst Inst = Context.getCur();
				1963	Variable *Dest = Inst->getDest();
				1964	Operand *Addr = Inst->getSrc(0);
				1965	Variable *Index = NULL;
				1966	int32_t Shift = 0;
				1967	int32_t Offset = 0; // TODO: make Constant
				1968	Variable *Base = llvm::dyn_cast<Variable>(Addr);
				1969	computeAddressOpt(Base, Index, Shift, Offset);
				1970	if (Base && Addr != Base) {
				1971	Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
				1972	Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
				1973	Shift);
				1974	Inst->setDeleted();
				1975	Context.insert(InstLoad::create(Func, Dest, Addr));
				1976	}
				1977	}
				1978
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1979	void TargetX8632::lowerPhi(const InstPhi * /Inst/) {
				1980	Func->setError("Phi found in regular instruction list");
				1981	}
				1982
				1983	void TargetX8632::lowerRet(const InstRet *Inst) {
				1984	Variable *Reg = NULL;
				1985	if (Inst->hasRetValue()) {
				1986	Operand *Src0 = legalize(Inst->getRetValue());
				1987	if (Src0->getType() == IceType_i64) {
				1988	Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);
				1989	Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);
				1990	Reg = eax;
				1991	Context.insert(InstFakeUse::create(Func, edx));
				1992	} else if (Src0->getType() == IceType_f32 \|\|
				1993	Src0->getType() == IceType_f64) {
				1994	_fld(Src0);
				1995	} else {
				1996	_mov(Reg, Src0, Reg_eax);
				1997	}
				1998	}
				1999	_ret(Reg);
				2000	// Add a fake use of esp to make sure esp stays alive for the entire
				2001	// function. Otherwise post-call esp adjustments get dead-code
				2002	// eliminated. TODO: Are there more places where the fake use
				2003	// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
				2004	// have a ret instruction.
				2005	Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
				2006	Context.insert(InstFakeUse::create(Func, esp));
				2007	}
				2008
				2009	void TargetX8632::lowerSelect(const InstSelect *Inst) {
				2010	// a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
				2011	Variable *Dest = Inst->getDest();
				2012	Operand *SrcT = Inst->getTrueOperand();
				2013	Operand *SrcF = Inst->getFalseOperand();
				2014	Operand *Condition = legalize(Inst->getCondition());
				2015	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				2016	InstX8632Label *Label = InstX8632Label::create(Func, this);
				2017
				2018	if (Dest->getType() == IceType_i64) {
				2019	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				2020	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				2021	Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg \| Legal_Imm, true);
				2022	Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Imm, true);
				2023	_cmp(Condition, Zero);
				2024	_mov(DestLo, SrcLoRI);
				2025	_mov(DestHi, SrcHiRI);
				2026	_br(InstX8632Br::Br_ne, Label);
				2027	Context.insert(InstFakeUse::create(Func, DestLo));
				2028	Context.insert(InstFakeUse::create(Func, DestHi));
				2029	Operand *SrcFLo = loOperand(SrcF);
				2030	Operand *SrcFHi = hiOperand(SrcF);
				2031	SrcLoRI = legalize(SrcFLo, Legal_Reg \| Legal_Imm, true);
				2032	SrcHiRI = legalize(SrcFHi, Legal_Reg \| Legal_Imm, true);
				2033	_mov(DestLo, SrcLoRI);
				2034	_mov(DestHi, SrcHiRI);
				2035	} else {
				2036	_cmp(Condition, Zero);
				2037	SrcT = legalize(SrcT, Legal_Reg \| Legal_Imm, true);
				2038	_mov(Dest, SrcT);
				2039	_br(InstX8632Br::Br_ne, Label);
				2040	Context.insert(InstFakeUse::create(Func, Dest));
				2041	SrcF = legalize(SrcF, Legal_Reg \| Legal_Imm, true);
				2042	_mov(Dest, SrcF);
				2043	}
				2044
				2045	Context.insert(Label);
				2046	}
				2047
				2048	void TargetX8632::lowerStore(const InstStore *Inst) {
				2049	Operand *Value = Inst->getData();
				2050	Operand *Addr = Inst->getAddr();
				2051	OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);
				2052	// Address mode optimization already creates an OperandX8632Mem
				2053	// operand, so it doesn't need another level of transformation.
				2054	if (!NewAddr) {
				2055	// The address will be either a constant (which represents a global
				2056	// variable) or a variable, so either the Base or Offset component
				2057	// of the OperandX8632Mem will be set.
				2058	Variable *Base = llvm::dyn_cast<Variable>(Addr);
				2059	Constant *Offset = llvm::dyn_cast<Constant>(Addr);
				2060	assert(Base \|\| Offset);
				2061	NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);
				2062	}
				2063	NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));
				2064
				2065	if (NewAddr->getType() == IceType_i64) {
				2066	Value = legalize(Value);
				2067	Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg \| Legal_Imm, true);
				2068	Operand *ValueLo = legalize(loOperand(Value), Legal_Reg \| Legal_Imm, true);
				2069	_store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
				2070	_store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
				2071	} else {
				2072	Value = legalize(Value, Legal_Reg \| Legal_Imm, true);
				2073	_store(Value, NewAddr);
				2074	}
				2075	}
				2076
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	2077	void TargetX8632::doAddressOptStore() {
				2078	InstStore Inst = llvm::cast<InstStore>(Context.getCur());
				2079	Operand *Data = Inst->getData();
				2080	Operand *Addr = Inst->getAddr();
				2081	Variable *Index = NULL;
				2082	int32_t Shift = 0;
				2083	int32_t Offset = 0; // TODO: make Constant
				2084	Variable *Base = llvm::dyn_cast<Variable>(Addr);
				2085	computeAddressOpt(Base, Index, Shift, Offset);
				2086	if (Base && Addr != Base) {
				2087	Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
				2088	Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
				2089	Shift);
				2090	Inst->setDeleted();
				2091	Context.insert(InstStore::create(Func, Data, Addr));
				2092	}
				2093	}
				2094
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	2095	void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
				2096	// This implements the most naive possible lowering.
				2097	// cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
				2098	Operand *Src0 = Inst->getComparison();
				2099	SizeT NumCases = Inst->getNumCases();
				2100	// OK, we'll be slightly less naive by forcing Src into a physical
				2101	// register if there are 2 or more uses.
				2102	if (NumCases >= 2)
				2103	Src0 = legalizeToVar(Src0, true);
				2104	else
				2105	Src0 = legalize(Src0, Legal_All, true);
				2106	for (SizeT I = 0; I < NumCases; ++I) {
				2107	Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
				2108	_cmp(Src0, Value);
				2109	_br(InstX8632Br::Br_e, Inst->getLabel(I));
				2110	}
				2111
				2112	_br(Inst->getLabelDefault());
				2113	}
				2114
				2115	void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) {
				2116	const SizeT MaxSrcs = 0;
				2117	Variable *Dest = NULL;
				2118	InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
				2119	lowerCall(Call);
				2120	}
				2121
				2122	Operand TargetX8632::legalize(Operand From, LegalMask Allowed,
				2123	bool AllowOverlap, int32_t RegNum) {
				2124	// Assert that a physical register is allowed. To date, all calls
				2125	// to legalize() allow a physical register. If a physical register
				2126	// needs to be explicitly disallowed, then new code will need to be
				2127	// written to force a spill.
				2128	assert(Allowed & Legal_Reg);
				2129	// If we're asking for a specific physical register, make sure we're
				2130	// not allowing any other operand kinds. (This could be future
				2131	// work, e.g. allow the shl shift amount to be either an immediate
				2132	// or in ecx.)
				2133	assert(RegNum == Variable::NoRegister \|\| Allowed == Legal_Reg);
				2134	if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
				2135	// Before doing anything with a Mem operand, we need to ensure
				2136	// that the Base and Index components are in physical registers.
				2137	Variable *Base = Mem->getBase();
				2138	Variable *Index = Mem->getIndex();
				2139	Variable *RegBase = NULL;
				2140	Variable *RegIndex = NULL;
				2141	if (Base) {
				2142	RegBase = legalizeToVar(Base, true);
				2143	}
				2144	if (Index) {
				2145	RegIndex = legalizeToVar(Index, true);
				2146	}
				2147	if (Base != RegBase \|\| Index != RegIndex) {
				2148	From =
				2149	OperandX8632Mem::create(Func, Mem->getType(), RegBase,
				2150	Mem->getOffset(), RegIndex, Mem->getShift());
				2151	}
				2152
				2153	if (!(Allowed & Legal_Mem)) {
				2154	Variable *Reg = makeReg(From->getType(), RegNum);
				2155	_mov(Reg, From, RegNum);
				2156	From = Reg;
				2157	}
				2158	return From;
				2159	}
				2160	if (llvm::isa<Constant>(From)) {
				2161	if (!(Allowed & Legal_Imm)) {
				2162	Variable *Reg = makeReg(From->getType(), RegNum);
				2163	_mov(Reg, From);
				2164	From = Reg;
				2165	}
				2166	return From;
				2167	}
				2168	if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
				2169	// We need a new physical register for the operand if:
				2170	// Mem is not allowed and Var->getRegNum() is unknown, or
				2171	// RegNum is required and Var->getRegNum() doesn't match.
				2172	if ((!(Allowed & Legal_Mem) && !Var->hasReg()) \|\|
				2173	(RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
				2174	Variable *Reg = makeReg(From->getType(), RegNum);
				2175	if (RegNum == Variable::NoRegister) {
				2176	Reg->setPreferredRegister(Var, AllowOverlap);
				2177	}
				2178	_mov(Reg, From);
				2179	From = Reg;
				2180	}
				2181	return From;
				2182	}
				2183	llvm_unreachable("Unhandled operand kind in legalize()");
				2184	return From;
				2185	}
				2186
				2187	// Provide a trivial wrapper to legalize() for this common usage.
				2188	Variable TargetX8632::legalizeToVar(Operand From, bool AllowOverlap,
				2189	int32_t RegNum) {
				2190	return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
				2191	}
				2192
				2193	Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
				2194	Variable *Reg = Func->makeVariable(Type, Context.getNode());
				2195	if (RegNum == Variable::NoRegister)
				2196	Reg->setWeightInfinite();
				2197	else
				2198	Reg->setRegNum(RegNum);
				2199	return Reg;
				2200	}
				2201
				2202	void TargetX8632::postLower() {
				2203	if (Ctx->getOptLevel() != Opt_m1)
				2204	return;
				2205	// TODO: Avoid recomputing WhiteList every instruction.
				2206	llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);
				2207	// Make one pass to black-list pre-colored registers. TODO: If
				2208	// there was some prior register allocation pass that made register
				2209	// assignments, those registers need to be black-listed here as
				2210	// well.
				2211	for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
				2212	++I) {
				2213	const Inst Inst = I;
				2214	if (Inst->isDeleted())
				2215	continue;
				2216	if (llvm::isa<InstFakeKill>(Inst))
				2217	continue;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	2218	for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
				2219	Operand *Src = Inst->getSrc(SrcNum);
				2220	SizeT NumVars = Src->getNumVars();
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	2221	for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	2222	const Variable *Var = Src->getVar(J);
				2223	if (!Var->hasReg())
				2224	continue;
				2225	WhiteList[Var->getRegNum()] = false;
				2226	}
				2227	}
				2228	}
				2229	// The second pass colors infinite-weight variables.
				2230	llvm::SmallBitVector AvailableRegisters = WhiteList;
				2231	for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
				2232	++I) {
				2233	const Inst Inst = I;
				2234	if (Inst->isDeleted())
				2235	continue;
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	2236	for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
				2237	Operand *Src = Inst->getSrc(SrcNum);
				2238	SizeT NumVars = Src->getNumVars();
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	2239	for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	2240	Variable *Var = Src->getVar(J);
				2241	if (Var->hasReg())
				2242	continue;
				2243	if (!Var->getWeight().isInf())
				2244	continue;
				2245	llvm::SmallBitVector AvailableTypedRegisters =
				2246	AvailableRegisters & getRegisterSetForType(Var->getType());
				2247	if (!AvailableTypedRegisters.any()) {
				2248	// This is a hack in case we run out of physical registers
				2249	// due to an excessive number of "push" instructions from
				2250	// lowering a call.
				2251	AvailableRegisters = WhiteList;
				2252	AvailableTypedRegisters =
				2253	AvailableRegisters & getRegisterSetForType(Var->getType());
				2254	}
				2255	assert(AvailableTypedRegisters.any());
				2256	int32_t RegNum = AvailableTypedRegisters.find_first();
				2257	Var->setRegNum(RegNum);
				2258	AvailableRegisters[RegNum] = false;
				2259	}
				2260	}
				2261	}
				2262	}
				2263
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	2264	template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
				2265	Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth	f61d5b2	2014-05-23 13:31:24 -0700	[diff] [blame]	2266	// It would be better to prefix with ".L$" instead of "L$", but
				2267	// llvm-mc doesn't parse "dword ptr [.L$foo]".
				2268	Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
				2269	}
				2270
Jim Stichnoth	d97c7df	2014-06-04 11:57:08 -0700	[diff] [blame]	2271	template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
				2272	Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth	f61d5b2	2014-05-23 13:31:24 -0700	[diff] [blame]	2273	Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
				2274	}
				2275
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	2276	} // end of namespace Ice