Blame - src/IceTargetLoweringX8632.cpp - SwiftShader

blob: 8ee5ac9282b6d2348db77cd0b0cf1260b2656280 [file] [log] [blame]

Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1	//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
				2	//
				3	// The Subzero Code Generator
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the TargetLoweringX8632 class, which
				11	// consists almost entirely of the lowering sequence for each
				12	// high-level instruction. It also implements
				13	// TargetX8632Fast::postLower() which does the simplest possible
				14	// register allocation for the "fast" target.
				15	//
				16	//===----------------------------------------------------------------------===//
				17
				18	#include "IceDefs.h"
				19	#include "IceCfg.h"
				20	#include "IceCfgNode.h"
				21	#include "IceInstX8632.h"
				22	#include "IceOperand.h"
				23	#include "IceTargetLoweringX8632.def"
				24	#include "IceTargetLoweringX8632.h"
				25
				26	namespace Ice {
				27
				28	namespace {
				29
				30	// The following table summarizes the logic for lowering the fcmp instruction.
				31	// There is one table entry for each of the 16 conditions. A comment in
				32	// lowerFcmp() describes the lowering template. In the most general case, there
				33	// is a compare followed by two conditional branches, because some fcmp
				34	// conditions don't map to a single x86 conditional branch. However, in many
				35	// cases it is possible to swap the operands in the comparison and have a single
				36	// conditional branch. Since it's quite tedious to validate the table by hand,
				37	// good execution tests are helpful.
				38
				39	const struct TableFcmp_ {
				40	uint32_t Default;
				41	bool SwapOperands;
				42	InstX8632Br::BrCond C1, C2;
				43	} TableFcmp[] = {
				44	#define X(val, dflt, swap, C1, C2) \
				45	{ dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
				46	,
				47	FCMPX8632_TABLE
				48	#undef X
				49	};
				50	const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
				51
				52	// The following table summarizes the logic for lowering the icmp instruction
				53	// for i32 and narrower types. Each icmp condition has a clear mapping to an
				54	// x86 conditional branch instruction.
				55
				56	const struct TableIcmp32_ {
				57	InstX8632Br::BrCond Mapping;
				58	} TableIcmp32[] = {
				59	#define X(val, C_32, C1_64, C2_64, C3_64) \
				60	{ InstX8632Br::C_32 } \
				61	,
				62	ICMPX8632_TABLE
				63	#undef X
				64	};
				65	const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
				66
				67	// The following table summarizes the logic for lowering the icmp instruction
				68	// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
				69	// conditional branches are needed. For the other conditions, three separate
				70	// conditional branches are needed.
				71	const struct TableIcmp64_ {
				72	InstX8632Br::BrCond C1, C2, C3;
				73	} TableIcmp64[] = {
				74	#define X(val, C_32, C1_64, C2_64, C3_64) \
				75	{ InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
				76	,
				77	ICMPX8632_TABLE
				78	#undef X
				79	};
				80	const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
				81
				82	InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
				83	size_t Index = static_cast<size_t>(Cond);
				84	assert(Index < TableIcmp32Size);
				85	return TableIcmp32[Index].Mapping;
				86	}
				87
				88	// In some cases, there are x-macros tables for both high-level and
				89	// low-level instructions/operands that use the same enum key value.
				90	// The tables are kept separate to maintain a proper separation
				91	// between abstraction layers. There is a risk that the tables
				92	// could get out of sync if enum values are reordered or if entries
				93	// are added or deleted. This dummy function uses static_assert to
				94	// ensure everything is kept in sync.
				95	void xMacroIntegrityCheck() {
				96	// Validate the enum values in FCMPX8632_TABLE.
				97	{
				98	// Define a temporary set of enum values based on low-level
				99	// table entries.
				100	enum _tmp_enum {
				101	#define X(val, dflt, swap, C1, C2) _tmp_##val,
				102	FCMPX8632_TABLE
				103	#undef X
				104	};
				105	// Define a set of constants based on high-level table entries.
				106	#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
				107	ICEINSTFCMP_TABLE;
				108	#undef X
				109	// Define a set of constants based on low-level table entries,
				110	// and ensure the table entry keys are consistent.
				111	#define X(val, dflt, swap, C1, C2) \
				112	static const int _table2_##val = _tmp_##val; \
				113	STATIC_ASSERT(_table1_##val == _table2_##val);
				114	FCMPX8632_TABLE;
				115	#undef X
				116	// Repeat the static asserts with respect to the high-level
				117	// table entries in case the high-level table has extra entries.
				118	#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
				119	ICEINSTFCMP_TABLE;
				120	#undef X
				121	}
				122
				123	// Validate the enum values in ICMPX8632_TABLE.
				124	{
				125	// Define a temporary set of enum values based on low-level
				126	// table entries.
				127	enum _tmp_enum {
				128	#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
				129	ICMPX8632_TABLE
				130	#undef X
				131	};
				132	// Define a set of constants based on high-level table entries.
				133	#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
				134	ICEINSTICMP_TABLE;
				135	#undef X
				136	// Define a set of constants based on low-level table entries,
				137	// and ensure the table entry keys are consistent.
				138	#define X(val, C_32, C1_64, C2_64, C3_64) \
				139	static const int _table2_##val = _tmp_##val; \
				140	STATIC_ASSERT(_table1_##val == _table2_##val);
				141	ICMPX8632_TABLE;
				142	#undef X
				143	// Repeat the static asserts with respect to the high-level
				144	// table entries in case the high-level table has extra entries.
				145	#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
				146	ICEINSTICMP_TABLE;
				147	#undef X
				148	}
				149
				150	// Validate the enum values in ICETYPEX8632_TABLE.
				151	{
				152	// Define a temporary set of enum values based on low-level
				153	// table entries.
				154	enum _tmp_enum {
				155	#define X(tag, cvt, sdss, width) _tmp_##tag,
				156	ICETYPEX8632_TABLE
				157	#undef X
				158	};
				159	// Define a set of constants based on high-level table entries.
				160	#define X(tag, size, align, str) static const int _table1_##tag = tag;
				161	ICETYPE_TABLE;
				162	#undef X
				163	// Define a set of constants based on low-level table entries,
				164	// and ensure the table entry keys are consistent.
				165	#define X(tag, cvt, sdss, width) \
				166	static const int _table2_##tag = _tmp_##tag; \
				167	STATIC_ASSERT(_table1_##tag == _table2_##tag);
				168	ICETYPEX8632_TABLE;
				169	#undef X
				170	// Repeat the static asserts with respect to the high-level
				171	// table entries in case the high-level table has extra entries.
				172	#define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
				173	ICETYPE_TABLE;
				174	#undef X
				175	}
				176	}
				177
				178	} // end of anonymous namespace
				179
				180	TargetX8632::TargetX8632(Cfg *Func)
				181	: TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),
				182	LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
				183	PhysicalRegisters(VarList(Reg_NUM)) {
				184	// TODO: Don't initialize IntegerRegisters and friends every time.
				185	// Instead, initialize in some sort of static initializer for the
				186	// class.
				187	llvm::SmallBitVector IntegerRegisters(Reg_NUM);
				188	llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
				189	llvm::SmallBitVector FloatRegisters(Reg_NUM);
				190	llvm::SmallBitVector InvalidRegisters(Reg_NUM);
				191	ScratchRegs.resize(Reg_NUM);
				192	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				193	frameptr, isI8, isInt, isFP) \
				194	IntegerRegisters[val] = isInt; \
				195	IntegerRegistersI8[val] = isI8; \
				196	FloatRegisters[val] = isFP; \
				197	ScratchRegs[val] = scratch;
				198	REGX8632_TABLE;
				199	#undef X
				200	TypeToRegisterSet[IceType_void] = InvalidRegisters;
				201	TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
				202	TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
				203	TypeToRegisterSet[IceType_i16] = IntegerRegisters;
				204	TypeToRegisterSet[IceType_i32] = IntegerRegisters;
				205	TypeToRegisterSet[IceType_i64] = IntegerRegisters;
				206	TypeToRegisterSet[IceType_f32] = FloatRegisters;
				207	TypeToRegisterSet[IceType_f64] = FloatRegisters;
				208	}
				209
				210	void TargetX8632::translateOm1() {
				211	GlobalContext *Context = Func->getContext();
				212	Ostream &Str = Context->getStrDump();
				213	Timer T_placePhiLoads;
				214	Func->placePhiLoads();
				215	if (Func->hasError())
				216	return;
				217	T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
				218	Timer T_placePhiStores;
				219	Func->placePhiStores();
				220	if (Func->hasError())
				221	return;
				222	T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
				223	Timer T_deletePhis;
				224	Func->deletePhis();
				225	if (Func->hasError())
				226	return;
				227	T_deletePhis.printElapsedUs(Context, "deletePhis()");
				228	if (Context->isVerbose()) {
				229	Str << "================ After Phi lowering ================\n";
				230	Func->dump();
				231	}
				232
				233	Timer T_genCode;
				234	Func->genCode();
				235	if (Func->hasError())
				236	return;
				237	T_genCode.printElapsedUs(Context, "genCode()");
				238	if (Context->isVerbose()) {
				239	Str << "================ After initial x8632 codegen ================\n";
				240	Func->dump();
				241	}
				242
				243	Timer T_genFrame;
				244	Func->genFrame();
				245	if (Func->hasError())
				246	return;
				247	T_genFrame.printElapsedUs(Context, "genFrame()");
				248	if (Context->isVerbose()) {
				249	Str << "================ After stack frame mapping ================\n";
				250	Func->dump();
				251	}
				252	}
				253
				254	IceString TargetX8632::RegNames[] = {
				255	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				256	frameptr, isI8, isInt, isFP) \
				257	name,
				258	REGX8632_TABLE
				259	#undef X
				260	};
				261
				262	Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
				263	assert(RegNum < PhysicalRegisters.size());
				264	Variable *Reg = PhysicalRegisters[RegNum];
				265	if (Reg == NULL) {
				266	CfgNode *Node = NULL; // NULL means multi-block lifetime
				267	Reg = Func->makeVariable(IceType_i32, Node);
				268	Reg->setRegNum(RegNum);
				269	PhysicalRegisters[RegNum] = Reg;
				270	}
				271	return Reg;
				272	}
				273
				274	IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
				275	assert(RegNum < Reg_NUM);
				276	static IceString RegNames8[] = {
				277	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				278	frameptr, isI8, isInt, isFP) \
				279	"" name8,
				280	REGX8632_TABLE
				281	#undef X
				282	};
				283	static IceString RegNames16[] = {
				284	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				285	frameptr, isI8, isInt, isFP) \
				286	"" name16,
				287	REGX8632_TABLE
				288	#undef X
				289	};
				290	switch (Ty) {
				291	case IceType_i1:
				292	case IceType_i8:
				293	return RegNames8[RegNum];
				294	case IceType_i16:
				295	return RegNames16[RegNum];
				296	default:
				297	return RegNames[RegNum];
				298	}
				299	}
				300
				301	void TargetX8632::emitVariable(const Variable Var, const Cfg Func) const {
				302	Ostream &Str = Ctx->getStrEmit();
				303	assert(Var->getLocalUseNode() == NULL \|\|
				304	Var->getLocalUseNode() == Func->getCurrentNode());
				305	if (Var->hasReg()) {
				306	Str << getRegName(Var->getRegNum(), Var->getType());
				307	return;
				308	}
				309	Str << InstX8632::getWidthString(Var->getType());
				310	Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);
				311	int32_t Offset = Var->getStackOffset() + getStackAdjustment();
				312	if (Offset) {
				313	if (Offset > 0)
				314	Str << "+";
				315	Str << Offset;
				316	}
				317	Str << "]";
				318	}
				319
				320	// Helper function for addProlog(). Sets the frame offset for Arg,
				321	// updates InArgsSizeBytes according to Arg's width, and generates an
				322	// instruction to copy Arg into its assigned register if applicable.
				323	// For an I64 arg that has been split into Lo and Hi components, it
				324	// calls itself recursively on the components, taking care to handle
				325	// Lo first because of the little-endian architecture.
				326	void TargetX8632::setArgOffsetAndCopy(Variable Arg, Variable FramePtr,
				327	int32_t BasicFrameOffset,
				328	int32_t &InArgsSizeBytes) {
				329	Variable *Lo = Arg->getLo();
				330	Variable *Hi = Arg->getHi();
				331	Type Ty = Arg->getType();
				332	if (Lo && Hi && Ty == IceType_i64) {
				333	assert(Lo->getType() != IceType_i64); // don't want infinite recursion
				334	assert(Hi->getType() != IceType_i64); // don't want infinite recursion
				335	setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				336	setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				337	return;
				338	}
				339	Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
				340	if (Arg->hasReg()) {
				341	assert(Ty != IceType_i64);
				342	OperandX8632Mem *Mem = OperandX8632Mem::create(
				343	Func, Ty, FramePtr,
				344	Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
				345	_mov(Arg, Mem);
				346	}
				347	InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
				348	}
				349
				350	void TargetX8632::addProlog(CfgNode *Node) {
				351	// If SimpleCoalescing is false, each variable without a register
				352	// gets its own unique stack slot, which leads to large stack
				353	// frames. If SimpleCoalescing is true, then each "global" variable
				354	// without a register gets its own slot, but "local" variable slots
				355	// are reused across basic blocks. E.g., if A and B are local to
				356	// block 1 and C is local to block 2, then C may share a slot with A
				357	// or B.
				358	const bool SimpleCoalescing = true;
				359	int32_t InArgsSizeBytes = 0;
				360	int32_t RetIpSizeBytes = 4;
				361	int32_t PreservedRegsSizeBytes = 0;
				362	LocalsSizeBytes = 0;
				363	Context.init(Node);
				364	Context.setInsertPoint(Context.getCur());
				365
				366	// Determine stack frame offsets for each Variable without a
				367	// register assignment. This can be done as one variable per stack
				368	// slot. Or, do coalescing by running the register allocator again
				369	// with an infinite set of registers (as a side effect, this gives
				370	// variables a second chance at physical register assignment).
				371	//
				372	// A middle ground approach is to leverage sparsity and allocate one
				373	// block of space on the frame for globals (variables with
				374	// multi-block lifetime), and one block to share for locals
				375	// (single-block lifetime).
				376
				377	llvm::SmallBitVector CalleeSaves =
				378	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				379
				380	int32_t GlobalsSize = 0;
				381	std::vector<int> LocalsSize(Func->getNumNodes());
				382
				383	// Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
				384	// LocalsSizeBytes.
				385	RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
				386	const VarList &Variables = Func->getVariables();
				387	const VarList &Args = Func->getArgs();
				388	for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
				389	I != E; ++I) {
				390	Variable Var = I;
				391	if (Var->hasReg()) {
				392	RegsUsed[Var->getRegNum()] = true;
				393	continue;
				394	}
				395	// An argument passed on the stack already has a stack slot.
				396	if (Var->getIsArg())
				397	continue;
				398	// A spill slot linked to a variable with a stack slot should reuse
				399	// that stack slot.
				400	if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
				401	if (Variable *Linked = Var->getPreferredRegister()) {
				402	if (!Linked->hasReg())
				403	continue;
				404	}
				405	}
				406	int32_t Increment = typeWidthInBytesOnStack(Var->getType());
				407	if (SimpleCoalescing) {
				408	if (Var->isMultiblockLife()) {
				409	GlobalsSize += Increment;
				410	} else {
				411	SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
				412	LocalsSize[NodeIndex] += Increment;
				413	if (LocalsSize[NodeIndex] > LocalsSizeBytes)
				414	LocalsSizeBytes = LocalsSize[NodeIndex];
				415	}
				416	} else {
				417	LocalsSizeBytes += Increment;
				418	}
				419	}
				420	LocalsSizeBytes += GlobalsSize;
				421
				422	// Add push instructions for preserved registers.
				423	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				424	if (CalleeSaves[i] && RegsUsed[i]) {
				425	PreservedRegsSizeBytes += 4;
				426	const bool SuppressStackAdjustment = true;
				427	_push(getPhysicalRegister(i), SuppressStackAdjustment);
				428	}
				429	}
				430
				431	// Generate "push ebp; mov ebp, esp"
				432	if (IsEbpBasedFrame) {
				433	assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
				434	.count() == 0);
				435	PreservedRegsSizeBytes += 4;
				436	Variable *ebp = getPhysicalRegister(Reg_ebp);
				437	Variable *esp = getPhysicalRegister(Reg_esp);
				438	const bool SuppressStackAdjustment = true;
				439	_push(ebp, SuppressStackAdjustment);
				440	_mov(ebp, esp);
				441	}
				442
				443	// Generate "sub esp, LocalsSizeBytes"
				444	if (LocalsSizeBytes)
				445	_sub(getPhysicalRegister(Reg_esp),
				446	Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
				447
				448	resetStackAdjustment();
				449
				450	// Fill in stack offsets for args, and copy args into registers for
				451	// those that were register-allocated. Args are pushed right to
				452	// left, so Arg[0] is closest to the stack/frame pointer.
				453	//
				454	// TODO: Make this right for different width args, calling
				455	// conventions, etc. For one thing, args passed in registers will
				456	// need to be copied/shuffled to their home registers (the
				457	// RegManager code may have some permutation logic to leverage),
				458	// and if they have no home register, home space will need to be
				459	// allocated on the stack to copy into.
				460	Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
				461	int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
				462	if (!IsEbpBasedFrame)
				463	BasicFrameOffset += LocalsSizeBytes;
				464	for (SizeT i = 0; i < Args.size(); ++i) {
				465	Variable *Arg = Args[i];
				466	setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				467	}
				468
				469	// Fill in stack offsets for locals.
				470	int32_t TotalGlobalsSize = GlobalsSize;
				471	GlobalsSize = 0;
				472	LocalsSize.assign(LocalsSize.size(), 0);
				473	int32_t NextStackOffset = 0;
				474	for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
				475	I != E; ++I) {
				476	Variable Var = I;
				477	if (Var->hasReg()) {
				478	RegsUsed[Var->getRegNum()] = true;
				479	continue;
				480	}
				481	if (Var->getIsArg())
				482	continue;
				483	if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
				484	if (Variable *Linked = Var->getPreferredRegister()) {
				485	if (!Linked->hasReg()) {
				486	// TODO: Make sure Linked has already been assigned a stack
				487	// slot.
				488	Var->setStackOffset(Linked->getStackOffset());
				489	continue;
				490	}
				491	}
				492	}
				493	int32_t Increment = typeWidthInBytesOnStack(Var->getType());
				494	if (SimpleCoalescing) {
				495	if (Var->isMultiblockLife()) {
				496	GlobalsSize += Increment;
				497	NextStackOffset = GlobalsSize;
				498	} else {
				499	SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
				500	LocalsSize[NodeIndex] += Increment;
				501	NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
				502	}
				503	} else {
				504	NextStackOffset += Increment;
				505	}
				506	if (IsEbpBasedFrame)
				507	Var->setStackOffset(-NextStackOffset);
				508	else
				509	Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
				510	}
				511	this->FrameSizeLocals = NextStackOffset;
				512	this->HasComputedFrame = true;
				513
				514	if (Func->getContext()->isVerbose(IceV_Frame)) {
				515	Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
				516	<< "\n"
				517	<< "InArgsSizeBytes=" << InArgsSizeBytes
				518	<< "\n"
				519	<< "PreservedRegsSizeBytes="
				520	<< PreservedRegsSizeBytes << "\n";
				521	}
				522	}
				523
				524	void TargetX8632::addEpilog(CfgNode *Node) {
				525	InstList &Insts = Node->getInsts();
				526	InstList::reverse_iterator RI, E;
				527	for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
				528	if (llvm::isa<InstX8632Ret>(*RI))
				529	break;
				530	}
				531	if (RI == E)
				532	return;
				533
				534	// Convert the reverse_iterator position into its corresponding
				535	// (forward) iterator position.
				536	InstList::iterator InsertPoint = RI.base();
				537	--InsertPoint;
				538	Context.init(Node);
				539	Context.setInsertPoint(InsertPoint);
				540
				541	Variable *esp = getPhysicalRegister(Reg_esp);
				542	if (IsEbpBasedFrame) {
				543	Variable *ebp = getPhysicalRegister(Reg_ebp);
				544	_mov(esp, ebp);
				545	_pop(ebp);
				546	} else {
				547	// add esp, LocalsSizeBytes
				548	if (LocalsSizeBytes)
				549	_add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
				550	}
				551
				552	// Add pop instructions for preserved registers.
				553	llvm::SmallBitVector CalleeSaves =
				554	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				555	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				556	SizeT j = CalleeSaves.size() - i - 1;
				557	if (j == Reg_ebp && IsEbpBasedFrame)
				558	continue;
				559	if (CalleeSaves[j] && RegsUsed[j]) {
				560	_pop(getPhysicalRegister(j));
				561	}
				562	}
				563	}
				564
Jim Stichnoth	f61d5b2	2014-05-23 13:31:24 -0700	[diff] [blame^]	565	template <typename T> struct PoolTypeConverter {};
				566
				567	template <> struct PoolTypeConverter<float> {
				568	typedef float PrimitiveFpType;
				569	typedef uint32_t PrimitiveIntType;
				570	typedef ConstantFloat IceType;
				571	static const Type Ty = IceType_f32;
				572	static const char *TypeName;
				573	static const char *AsmTag;
				574	static const char *PrintfString;
				575	};
				576	const char *PoolTypeConverter<float>::TypeName = "float";
				577	const char *PoolTypeConverter<float>::AsmTag = ".long";
				578	const char *PoolTypeConverter<float>::PrintfString = "0x%x";
				579
				580	template <> struct PoolTypeConverter<double> {
				581	typedef double PrimitiveFpType;
				582	typedef uint64_t PrimitiveIntType;
				583	typedef ConstantDouble IceType;
				584	static const Type Ty = IceType_f64;
				585	static const char *TypeName;
				586	static const char *AsmTag;
				587	static const char *PrintfString;
				588	};
				589	const char *PoolTypeConverter<double>::TypeName = "double";
				590	const char *PoolTypeConverter<double>::AsmTag = ".quad";
				591	const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
				592
				593	template <typename T> void TargetX8632::emitConstantPool() const {
				594	Ostream &Str = Ctx->getStrEmit();
				595	Type Ty = T::Ty;
				596	SizeT Align = typeAlignInBytes(Ty);
				597	ConstantList Pool = Ctx->getConstantPool(Ty);
				598
				599	Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
				600	<< "\n";
				601	Str << "\t.align\t" << Align << "\n";
				602	for (ConstantList::const_iterator I = Pool.begin(), E = Pool.end(); I != E;
				603	++I) {
				604	typename T::IceType Const = llvm::cast<typename T::IceType>(I);
				605	typename T::PrimitiveFpType Value = Const->getValue();
				606	// Use memcpy() to copy bits from Value into RawValue in a way
				607	// that avoids breaking strict-aliasing rules.
				608	typename T::PrimitiveIntType RawValue;
				609	memcpy(&RawValue, &Value, sizeof(Value));
				610	char buf[30];
				611	int CharsPrinted =
				612	snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
				613	assert(CharsPrinted >= 0 &&
				614	(size_t)CharsPrinted < llvm::array_lengthof(buf));
				615	(void)CharsPrinted; // avoid warnings if asserts are disabled
				616	Str << "L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
				617	Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
				618	<< Value << "\n";
				619	}
				620	}
				621
				622	void TargetX8632::emitConstants() const {
				623	emitConstantPool<PoolTypeConverter<float> >();
				624	emitConstantPool<PoolTypeConverter<double> >();
				625
				626	// No need to emit constants from the int pool since (for x86) they
				627	// are embedded as immediates in the instructions.
				628	}
				629
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	630	void TargetX8632::split64(Variable *Var) {
				631	switch (Var->getType()) {
				632	default:
				633	return;
				634	case IceType_i64:
				635	// TODO: Only consider F64 if we need to push each half when
				636	// passing as an argument to a function call. Note that each half
				637	// is still typed as I32.
				638	case IceType_f64:
				639	break;
				640	}
				641	Variable *Lo = Var->getLo();
				642	Variable *Hi = Var->getHi();
				643	if (Lo) {
				644	assert(Hi);
				645	return;
				646	}
				647	assert(Hi == NULL);
				648	Lo = Func->makeVariable(IceType_i32, Context.getNode(),
				649	Var->getName() + "__lo");
				650	Hi = Func->makeVariable(IceType_i32, Context.getNode(),
				651	Var->getName() + "__hi");
				652	Var->setLoHi(Lo, Hi);
				653	if (Var->getIsArg()) {
				654	Lo->setIsArg(Func);
				655	Hi->setIsArg(Func);
				656	}
				657	}
				658
				659	Operand TargetX8632::loOperand(Operand Operand) {
				660	assert(Operand->getType() == IceType_i64);
				661	if (Operand->getType() != IceType_i64)
				662	return Operand;
				663	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				664	split64(Var);
				665	return Var->getLo();
				666	}
				667	if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
				668	uint64_t Mask = (1ull << 32) - 1;
				669	return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);
				670	}
				671	if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
				672	return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
				673	Mem->getOffset(), Mem->getIndex(),
				674	Mem->getShift());
				675	}
				676	llvm_unreachable("Unsupported operand type");
				677	return NULL;
				678	}
				679
				680	Operand TargetX8632::hiOperand(Operand Operand) {
				681	assert(Operand->getType() == IceType_i64);
				682	if (Operand->getType() != IceType_i64)
				683	return Operand;
				684	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				685	split64(Var);
				686	return Var->getHi();
				687	}
				688	if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
				689	return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);
				690	}
				691	if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
				692	Constant *Offset = Mem->getOffset();
				693	if (Offset == NULL)
				694	Offset = Ctx->getConstantInt(IceType_i32, 4);
				695	else if (ConstantInteger *IntOffset =
				696	llvm::dyn_cast<ConstantInteger>(Offset)) {
				697	Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());
				698	} else if (ConstantRelocatable *SymOffset =
				699	llvm::dyn_cast<ConstantRelocatable>(Offset)) {
				700	Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
				701	SymOffset->getName());
				702	}
				703	return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
				704	Mem->getIndex(), Mem->getShift());
				705	}
				706	llvm_unreachable("Unsupported operand type");
				707	return NULL;
				708	}
				709
				710	llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
				711	RegSetMask Exclude) const {
				712	llvm::SmallBitVector Registers(Reg_NUM);
				713
				714	#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
				715	frameptr, isI8, isInt, isFP) \
				716	if (scratch && (Include & RegSet_CallerSave)) \
				717	Registers[val] = true; \
				718	if (preserved && (Include & RegSet_CalleeSave)) \
				719	Registers[val] = true; \
				720	if (stackptr && (Include & RegSet_StackPointer)) \
				721	Registers[val] = true; \
				722	if (frameptr && (Include & RegSet_FramePointer)) \
				723	Registers[val] = true; \
				724	if (scratch && (Exclude & RegSet_CallerSave)) \
				725	Registers[val] = false; \
				726	if (preserved && (Exclude & RegSet_CalleeSave)) \
				727	Registers[val] = false; \
				728	if (stackptr && (Exclude & RegSet_StackPointer)) \
				729	Registers[val] = false; \
				730	if (frameptr && (Exclude & RegSet_FramePointer)) \
				731	Registers[val] = false;
				732
				733	REGX8632_TABLE
				734
				735	#undef X
				736
				737	return Registers;
				738	}
				739
				740	void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
				741	IsEbpBasedFrame = true;
				742	// TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
				743	// the number of adjustments of esp, etc.
				744	Variable *esp = getPhysicalRegister(Reg_esp);
				745	Operand *TotalSize = legalize(Inst->getSizeInBytes());
				746	Variable *Dest = Inst->getDest();
				747	_sub(esp, TotalSize);
				748	_mov(Dest, esp);
				749	}
				750
				751	void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
				752	Variable *Dest = Inst->getDest();
				753	Operand *Src0 = legalize(Inst->getSrc(0));
				754	Operand *Src1 = legalize(Inst->getSrc(1));
				755	if (Dest->getType() == IceType_i64) {
				756	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				757	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				758	Operand *Src0Lo = loOperand(Src0);
				759	Operand *Src0Hi = hiOperand(Src0);
				760	Operand *Src1Lo = loOperand(Src1);
				761	Operand *Src1Hi = hiOperand(Src1);
				762	Variable T_Lo = NULL, T_Hi = NULL;
				763	switch (Inst->getOp()) {
				764	case InstArithmetic::Add:
				765	_mov(T_Lo, Src0Lo);
				766	_add(T_Lo, Src1Lo);
				767	_mov(DestLo, T_Lo);
				768	_mov(T_Hi, Src0Hi);
				769	_adc(T_Hi, Src1Hi);
				770	_mov(DestHi, T_Hi);
				771	break;
				772	case InstArithmetic::And:
				773	_mov(T_Lo, Src0Lo);
				774	_and(T_Lo, Src1Lo);
				775	_mov(DestLo, T_Lo);
				776	_mov(T_Hi, Src0Hi);
				777	_and(T_Hi, Src1Hi);
				778	_mov(DestHi, T_Hi);
				779	break;
				780	case InstArithmetic::Or:
				781	_mov(T_Lo, Src0Lo);
				782	_or(T_Lo, Src1Lo);
				783	_mov(DestLo, T_Lo);
				784	_mov(T_Hi, Src0Hi);
				785	_or(T_Hi, Src1Hi);
				786	_mov(DestHi, T_Hi);
				787	break;
				788	case InstArithmetic::Xor:
				789	_mov(T_Lo, Src0Lo);
				790	_xor(T_Lo, Src1Lo);
				791	_mov(DestLo, T_Lo);
				792	_mov(T_Hi, Src0Hi);
				793	_xor(T_Hi, Src1Hi);
				794	_mov(DestHi, T_Hi);
				795	break;
				796	case InstArithmetic::Sub:
				797	_mov(T_Lo, Src0Lo);
				798	_sub(T_Lo, Src1Lo);
				799	_mov(DestLo, T_Lo);
				800	_mov(T_Hi, Src0Hi);
				801	_sbb(T_Hi, Src1Hi);
				802	_mov(DestHi, T_Hi);
				803	break;
				804	case InstArithmetic::Mul: {
				805	Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;
				806	Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
				807	Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);
				808	// gcc does the following:
				809	// a=b*c ==>
				810	// t1 = b.hi; t1 *=(imul) c.lo
				811	// t2 = c.hi; t2 *=(imul) b.lo
				812	// t3:eax = b.lo
				813	// t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
				814	// a.lo = t4.lo
				815	// t4.hi += t1
				816	// t4.hi += t2
				817	// a.hi = t4.hi
				818	_mov(T_1, Src0Hi);
				819	_imul(T_1, Src1Lo);
				820	_mov(T_2, Src1Hi);
				821	_imul(T_2, Src0Lo);
				822	_mov(T_3, Src0Lo, Reg_eax);
				823	_mul(T_4Lo, T_3, Src1Lo);
				824	// The mul instruction produces two dest variables, edx:eax. We
				825	// create a fake definition of edx to account for this.
				826	Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
				827	_mov(DestLo, T_4Lo);
				828	_add(T_4Hi, T_1);
				829	_add(T_4Hi, T_2);
				830	_mov(DestHi, T_4Hi);
				831	} break;
				832	case InstArithmetic::Shl: {
				833	// TODO: Refactor the similarities between Shl, Lshr, and Ashr.
				834	// gcc does the following:
				835	// a=b<<c ==>
				836	// t1:ecx = c.lo & 0xff
				837	// t2 = b.lo
				838	// t3 = b.hi
				839	// t3 = shld t3, t2, t1
				840	// t2 = shl t2, t1
				841	// test t1, 0x20
				842	// je L1
				843	// use(t3)
				844	// t3 = t2
				845	// t2 = 0
				846	// L1:
				847	// a.lo = t2
				848	// a.hi = t3
				849	Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;
				850	Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
				851	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				852	InstX8632Label *Label = InstX8632Label::create(Func, this);
				853	_mov(T_1, Src1Lo, Reg_ecx);
				854	_mov(T_2, Src0Lo);
				855	_mov(T_3, Src0Hi);
				856	_shld(T_3, T_2, T_1);
				857	_shl(T_2, T_1);
				858	_test(T_1, BitTest);
				859	_br(InstX8632Br::Br_e, Label);
				860	// Because of the intra-block control flow, we need to fake a use
				861	// of T_3 to prevent its earlier definition from being dead-code
				862	// eliminated in the presence of its later definition.
				863	Context.insert(InstFakeUse::create(Func, T_3));
				864	_mov(T_3, T_2);
				865	_mov(T_2, Zero);
				866	Context.insert(Label);
				867	_mov(DestLo, T_2);
				868	_mov(DestHi, T_3);
				869	} break;
				870	case InstArithmetic::Lshr: {
				871	// a=b>>c (unsigned) ==>
				872	// t1:ecx = c.lo & 0xff
				873	// t2 = b.lo
				874	// t3 = b.hi
				875	// t2 = shrd t2, t3, t1
				876	// t3 = shr t3, t1
				877	// test t1, 0x20
				878	// je L1
				879	// use(t2)
				880	// t2 = t3
				881	// t3 = 0
				882	// L1:
				883	// a.lo = t2
				884	// a.hi = t3
				885	Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;
				886	Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
				887	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				888	InstX8632Label *Label = InstX8632Label::create(Func, this);
				889	_mov(T_1, Src1Lo, Reg_ecx);
				890	_mov(T_2, Src0Lo);
				891	_mov(T_3, Src0Hi);
				892	_shrd(T_2, T_3, T_1);
				893	_shr(T_3, T_1);
				894	_test(T_1, BitTest);
				895	_br(InstX8632Br::Br_e, Label);
				896	// Because of the intra-block control flow, we need to fake a use
				897	// of T_3 to prevent its earlier definition from being dead-code
				898	// eliminated in the presence of its later definition.
				899	Context.insert(InstFakeUse::create(Func, T_2));
				900	_mov(T_2, T_3);
				901	_mov(T_3, Zero);
				902	Context.insert(Label);
				903	_mov(DestLo, T_2);
				904	_mov(DestHi, T_3);
				905	} break;
				906	case InstArithmetic::Ashr: {
				907	// a=b>>c (signed) ==>
				908	// t1:ecx = c.lo & 0xff
				909	// t2 = b.lo
				910	// t3 = b.hi
				911	// t2 = shrd t2, t3, t1
				912	// t3 = sar t3, t1
				913	// test t1, 0x20
				914	// je L1
				915	// use(t2)
				916	// t2 = t3
				917	// t3 = sar t3, 0x1f
				918	// L1:
				919	// a.lo = t2
				920	// a.hi = t3
				921	Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;
				922	Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
				923	Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);
				924	InstX8632Label *Label = InstX8632Label::create(Func, this);
				925	_mov(T_1, Src1Lo, Reg_ecx);
				926	_mov(T_2, Src0Lo);
				927	_mov(T_3, Src0Hi);
				928	_shrd(T_2, T_3, T_1);
				929	_sar(T_3, T_1);
				930	_test(T_1, BitTest);
				931	_br(InstX8632Br::Br_e, Label);
				932	// Because of the intra-block control flow, we need to fake a use
				933	// of T_3 to prevent its earlier definition from being dead-code
				934	// eliminated in the presence of its later definition.
				935	Context.insert(InstFakeUse::create(Func, T_2));
				936	_mov(T_2, T_3);
				937	_sar(T_3, SignExtend);
				938	Context.insert(Label);
				939	_mov(DestLo, T_2);
				940	_mov(DestHi, T_3);
				941	} break;
				942	case InstArithmetic::Udiv: {
				943	const SizeT MaxSrcs = 2;
				944	InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
				945	Call->addArg(Inst->getSrc(0));
				946	Call->addArg(Inst->getSrc(1));
				947	lowerCall(Call);
				948	} break;
				949	case InstArithmetic::Sdiv: {
				950	const SizeT MaxSrcs = 2;
				951	InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
				952	Call->addArg(Inst->getSrc(0));
				953	Call->addArg(Inst->getSrc(1));
				954	lowerCall(Call);
				955	} break;
				956	case InstArithmetic::Urem: {
				957	const SizeT MaxSrcs = 2;
				958	InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
				959	Call->addArg(Inst->getSrc(0));
				960	Call->addArg(Inst->getSrc(1));
				961	lowerCall(Call);
				962	} break;
				963	case InstArithmetic::Srem: {
				964	const SizeT MaxSrcs = 2;
				965	InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
				966	Call->addArg(Inst->getSrc(0));
				967	Call->addArg(Inst->getSrc(1));
				968	lowerCall(Call);
				969	} break;
				970	case InstArithmetic::Fadd:
				971	case InstArithmetic::Fsub:
				972	case InstArithmetic::Fmul:
				973	case InstArithmetic::Fdiv:
				974	case InstArithmetic::Frem:
				975	llvm_unreachable("FP instruction with i64 type");
				976	break;
				977	}
				978	} else { // Dest->getType() != IceType_i64
				979	Variable *T_edx = NULL;
				980	Variable *T = NULL;
				981	switch (Inst->getOp()) {
				982	case InstArithmetic::Add:
				983	_mov(T, Src0);
				984	_add(T, Src1);
				985	_mov(Dest, T);
				986	break;
				987	case InstArithmetic::And:
				988	_mov(T, Src0);
				989	_and(T, Src1);
				990	_mov(Dest, T);
				991	break;
				992	case InstArithmetic::Or:
				993	_mov(T, Src0);
				994	_or(T, Src1);
				995	_mov(Dest, T);
				996	break;
				997	case InstArithmetic::Xor:
				998	_mov(T, Src0);
				999	_xor(T, Src1);
				1000	_mov(Dest, T);
				1001	break;
				1002	case InstArithmetic::Sub:
				1003	_mov(T, Src0);
				1004	_sub(T, Src1);
				1005	_mov(Dest, T);
				1006	break;
				1007	case InstArithmetic::Mul:
				1008	// TODO: Optimize for llvm::isa<Constant>(Src1)
				1009	// TODO: Strength-reduce multiplications by a constant,
				1010	// particularly -1 and powers of 2. Advanced: use lea to
				1011	// multiply by 3, 5, 9.
				1012	//
				1013	// The 8-bit version of imul only allows the form "imul r/m8"
				1014	// where T must be in eax.
				1015	if (Dest->getType() == IceType_i8)
				1016	_mov(T, Src0, Reg_eax);
				1017	else
				1018	_mov(T, Src0);
				1019	_imul(T, Src1);
				1020	_mov(Dest, T);
				1021	break;
				1022	case InstArithmetic::Shl:
				1023	_mov(T, Src0);
				1024	if (!llvm::isa<Constant>(Src1))
				1025	Src1 = legalizeToVar(Src1, false, Reg_ecx);
				1026	_shl(T, Src1);
				1027	_mov(Dest, T);
				1028	break;
				1029	case InstArithmetic::Lshr:
				1030	_mov(T, Src0);
				1031	if (!llvm::isa<Constant>(Src1))
				1032	Src1 = legalizeToVar(Src1, false, Reg_ecx);
				1033	_shr(T, Src1);
				1034	_mov(Dest, T);
				1035	break;
				1036	case InstArithmetic::Ashr:
				1037	_mov(T, Src0);
				1038	if (!llvm::isa<Constant>(Src1))
				1039	Src1 = legalizeToVar(Src1, false, Reg_ecx);
				1040	_sar(T, Src1);
				1041	_mov(Dest, T);
				1042	break;
				1043	case InstArithmetic::Udiv:
				1044	if (Dest->getType() == IceType_i8) {
				1045	Variable *T_ah = NULL;
				1046	Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
				1047	_mov(T, Src0, Reg_eax);
				1048	_mov(T_ah, Zero, Reg_ah);
				1049	_div(T, Src1, T_ah);
				1050	_mov(Dest, T);
				1051	} else {
				1052	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1053	_mov(T, Src0, Reg_eax);
				1054	_mov(T_edx, Zero, Reg_edx);
				1055	_div(T, Src1, T_edx);
				1056	_mov(Dest, T);
				1057	}
				1058	break;
				1059	case InstArithmetic::Sdiv:
				1060	T_edx = makeReg(IceType_i32, Reg_edx);
				1061	_mov(T, Src0, Reg_eax);
				1062	_cdq(T_edx, T);
				1063	_idiv(T, Src1, T_edx);
				1064	_mov(Dest, T);
				1065	break;
				1066	case InstArithmetic::Urem:
				1067	if (Dest->getType() == IceType_i8) {
				1068	Variable *T_ah = NULL;
				1069	Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
				1070	_mov(T, Src0, Reg_eax);
				1071	_mov(T_ah, Zero, Reg_ah);
				1072	_div(T_ah, Src1, T);
				1073	_mov(Dest, T_ah);
				1074	} else {
				1075	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1076	_mov(T_edx, Zero, Reg_edx);
				1077	_mov(T, Src0, Reg_eax);
				1078	_div(T_edx, Src1, T);
				1079	_mov(Dest, T_edx);
				1080	}
				1081	break;
				1082	case InstArithmetic::Srem:
				1083	T_edx = makeReg(IceType_i32, Reg_edx);
				1084	_mov(T, Src0, Reg_eax);
				1085	_cdq(T_edx, T);
				1086	_idiv(T_edx, Src1, T);
				1087	_mov(Dest, T_edx);
				1088	break;
				1089	case InstArithmetic::Fadd:
				1090	_mov(T, Src0);
				1091	_addss(T, Src1);
				1092	_mov(Dest, T);
				1093	break;
				1094	case InstArithmetic::Fsub:
				1095	_mov(T, Src0);
				1096	_subss(T, Src1);
				1097	_mov(Dest, T);
				1098	break;
				1099	case InstArithmetic::Fmul:
				1100	_mov(T, Src0);
				1101	_mulss(T, Src1);
				1102	_mov(Dest, T);
				1103	break;
				1104	case InstArithmetic::Fdiv:
				1105	_mov(T, Src0);
				1106	_divss(T, Src1);
				1107	_mov(Dest, T);
				1108	break;
				1109	case InstArithmetic::Frem: {
				1110	const SizeT MaxSrcs = 2;
				1111	Type Ty = Dest->getType();
				1112	InstCall *Call =
				1113	makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);
				1114	Call->addArg(Src0);
				1115	Call->addArg(Src1);
				1116	return lowerCall(Call);
				1117	} break;
				1118	}
				1119	}
				1120	}
				1121
				1122	void TargetX8632::lowerAssign(const InstAssign *Inst) {
				1123	Variable *Dest = Inst->getDest();
				1124	Operand *Src0 = Inst->getSrc(0);
				1125	assert(Dest->getType() == Src0->getType());
				1126	if (Dest->getType() == IceType_i64) {
				1127	Src0 = legalize(Src0);
				1128	Operand *Src0Lo = loOperand(Src0);
				1129	Operand *Src0Hi = hiOperand(Src0);
				1130	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1131	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1132	Variable T_Lo = NULL, T_Hi = NULL;
				1133	_mov(T_Lo, Src0Lo);
				1134	_mov(DestLo, T_Lo);
				1135	_mov(T_Hi, Src0Hi);
				1136	_mov(DestHi, T_Hi);
				1137	} else {
				1138	const bool AllowOverlap = true;
				1139	// RI is either a physical register or an immediate.
				1140	Operand *RI = legalize(Src0, Legal_Reg \| Legal_Imm, AllowOverlap);
				1141	_mov(Dest, RI);
				1142	}
				1143	}
				1144
				1145	void TargetX8632::lowerBr(const InstBr *Inst) {
				1146	if (Inst->isUnconditional()) {
				1147	_br(Inst->getTargetUnconditional());
				1148	} else {
				1149	Operand *Src0 = legalize(Inst->getCondition());
				1150	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1151	_cmp(Src0, Zero);
				1152	_br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
				1153	}
				1154	}
				1155
				1156	void TargetX8632::lowerCall(const InstCall *Instr) {
				1157	// Generate a sequence of push instructions, pushing right to left,
				1158	// keeping track of stack offsets in case a push involves a stack
				1159	// operand and we are using an esp-based frame.
				1160	uint32_t StackOffset = 0;
				1161	// TODO: If for some reason the call instruction gets dead-code
				1162	// eliminated after lowering, we would need to ensure that the
				1163	// pre-call push instructions and the post-call esp adjustment get
				1164	// eliminated as well.
				1165	for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {
				1166	Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));
				1167	if (Arg->getType() == IceType_i64) {
				1168	_push(hiOperand(Arg));
				1169	_push(loOperand(Arg));
				1170	} else if (Arg->getType() == IceType_f64) {
				1171	// If the Arg turns out to be a memory operand, we need to push
				1172	// 8 bytes, which requires two push instructions. This ends up
				1173	// being somewhat clumsy in the current IR, so we use a
				1174	// workaround. Force the operand into a (xmm) register, and
				1175	// then push the register. An xmm register push is actually not
				1176	// possible in x86, but the Push instruction emitter handles
				1177	// this by decrementing the stack pointer and directly writing
				1178	// the xmm register value.
				1179	Variable *T = NULL;
				1180	_mov(T, Arg);
				1181	_push(T);
				1182	} else {
				1183	_push(Arg);
				1184	}
				1185	StackOffset += typeWidthInBytesOnStack(Arg->getType());
				1186	}
				1187	// Generate the call instruction. Assign its result to a temporary
				1188	// with high register allocation weight.
				1189	Variable *Dest = Instr->getDest();
				1190	Variable *eax = NULL; // doubles as RegLo as necessary
				1191	Variable *edx = NULL;
				1192	if (Dest) {
				1193	switch (Dest->getType()) {
				1194	case IceType_NUM:
				1195	llvm_unreachable("Invalid Call dest type");
				1196	break;
				1197	case IceType_void:
				1198	break;
				1199	case IceType_i1:
				1200	case IceType_i8:
				1201	case IceType_i16:
				1202	case IceType_i32:
				1203	eax = makeReg(Dest->getType(), Reg_eax);
				1204	break;
				1205	case IceType_i64:
				1206	eax = makeReg(IceType_i32, Reg_eax);
				1207	edx = makeReg(IceType_i32, Reg_edx);
				1208	break;
				1209	case IceType_f32:
				1210	case IceType_f64:
				1211	// Leave eax==edx==NULL, and capture the result with the fstp
				1212	// instruction.
				1213	break;
				1214	}
				1215	}
				1216	Operand *CallTarget = legalize(Instr->getCallTarget());
				1217	Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);
				1218	Context.insert(NewCall);
				1219	if (edx)
				1220	Context.insert(InstFakeDef::create(Func, edx));
				1221
				1222	// Add the appropriate offset to esp.
				1223	if (StackOffset) {
				1224	Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
				1225	_add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
				1226	}
				1227
				1228	// Insert a register-kill pseudo instruction.
				1229	VarList KilledRegs;
				1230	for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
				1231	if (ScratchRegs[i])
				1232	KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
				1233	}
				1234	Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
				1235
				1236	// Generate a FakeUse to keep the call live if necessary.
				1237	if (Instr->hasSideEffects() && eax) {
				1238	Inst *FakeUse = InstFakeUse::create(Func, eax);
				1239	Context.insert(FakeUse);
				1240	}
				1241
				1242	// Generate Dest=eax assignment.
				1243	if (Dest && eax) {
				1244	if (edx) {
				1245	split64(Dest);
				1246	Variable *DestLo = Dest->getLo();
				1247	Variable *DestHi = Dest->getHi();
				1248	DestLo->setPreferredRegister(eax, false);
				1249	DestHi->setPreferredRegister(edx, false);
				1250	_mov(DestLo, eax);
				1251	_mov(DestHi, edx);
				1252	} else {
				1253	Dest->setPreferredRegister(eax, false);
				1254	_mov(Dest, eax);
				1255	}
				1256	}
				1257
				1258	// Special treatment for an FP function which returns its result in
				1259	// st(0).
				1260	if (Dest &&
				1261	(Dest->getType() == IceType_f32 \|\| Dest->getType() == IceType_f64)) {
				1262	_fstp(Dest);
				1263	// If Dest ends up being a physical xmm register, the fstp emit
				1264	// code will route st(0) through a temporary stack slot.
				1265	}
				1266	}
				1267
				1268	void TargetX8632::lowerCast(const InstCast *Inst) {
				1269	// a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
				1270	InstCast::OpKind CastKind = Inst->getCastKind();
				1271	Variable *Dest = Inst->getDest();
				1272	// Src0RM is the source operand legalized to physical register or memory, but
				1273	// not immediate, since the relevant x86 native instructions don't allow an
				1274	// immediate operand. If the operand is an immediate, we could consider
				1275	// computing the strength-reduced result at translation time, but we're
				1276	// unlikely to see something like that in the bitcode that the optimizer
				1277	// wouldn't have already taken care of.
				1278	Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem, true);
				1279	switch (CastKind) {
				1280	default:
				1281	Func->setError("Cast type not supported");
				1282	return;
				1283	case InstCast::Sext:
				1284	if (Dest->getType() == IceType_i64) {
				1285	// t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
				1286	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1287	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1288	Variable *T_Lo = makeReg(DestLo->getType());
				1289	if (Src0RM->getType() == IceType_i32)
				1290	_mov(T_Lo, Src0RM);
				1291	else
				1292	_movsx(T_Lo, Src0RM);
				1293	_mov(DestLo, T_Lo);
				1294	Variable *T_Hi = NULL;
				1295	Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);
				1296	_mov(T_Hi, T_Lo);
				1297	_sar(T_Hi, Shift);
				1298	_mov(DestHi, T_Hi);
				1299	} else {
				1300	// TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
				1301	// also copy to the high operand of a 64-bit variable.
				1302	// t1 = movsx src; dst = t1
				1303	Variable *T = makeReg(Dest->getType());
				1304	_movsx(T, Src0RM);
				1305	_mov(Dest, T);
				1306	}
				1307	break;
				1308	case InstCast::Zext:
				1309	if (Dest->getType() == IceType_i64) {
				1310	// t1=movzx src; dst.lo=t1; dst.hi=0
				1311	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1312	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1313	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1314	Variable *Tmp = makeReg(DestLo->getType());
				1315	if (Src0RM->getType() == IceType_i32)
				1316	_mov(Tmp, Src0RM);
				1317	else
				1318	_movzx(Tmp, Src0RM);
				1319	_mov(DestLo, Tmp);
				1320	_mov(DestHi, Zero);
				1321	} else if (Src0RM->getType() == IceType_i1) {
				1322	// t = Src0RM; t &= 1; Dest = t
				1323	Operand *One = Ctx->getConstantInt(IceType_i32, 1);
				1324	Variable *T = makeReg(IceType_i32);
				1325	_movzx(T, Src0RM);
				1326	_and(T, One);
				1327	_mov(Dest, T);
				1328	} else {
				1329	// t1 = movzx src; dst = t1
				1330	Variable *T = makeReg(Dest->getType());
				1331	_movzx(T, Src0RM);
				1332	_mov(Dest, T);
				1333	}
				1334	break;
				1335	case InstCast::Trunc: {
				1336	if (Src0RM->getType() == IceType_i64)
				1337	Src0RM = loOperand(Src0RM);
				1338	// t1 = trunc Src0RM; Dest = t1
				1339	Variable *T = NULL;
				1340	_mov(T, Src0RM);
				1341	_mov(Dest, T);
				1342	break;
				1343	}
				1344	case InstCast::Fptrunc:
				1345	case InstCast::Fpext: {
				1346	// t1 = cvt Src0RM; Dest = t1
				1347	Variable *T = makeReg(Dest->getType());
				1348	_cvt(T, Src0RM);
				1349	_mov(Dest, T);
				1350	break;
				1351	}
				1352	case InstCast::Fptosi:
				1353	if (Dest->getType() == IceType_i64) {
				1354	// Use a helper for converting floating-point values to 64-bit
				1355	// integers. SSE2 appears to have no way to convert from xmm
				1356	// registers to something like the edx:eax register pair, and
				1357	// gcc and clang both want to use x87 instructions complete with
				1358	// temporary manipulation of the status word. This helper is
				1359	// not needed for x86-64.
				1360	split64(Dest);
				1361	const SizeT MaxSrcs = 1;
				1362	Type SrcType = Inst->getSrc(0)->getType();
				1363	InstCall *Call = makeHelperCall(
				1364	SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);
				1365	// TODO: Call the correct compiler-rt helper function.
				1366	Call->addArg(Inst->getSrc(0));
				1367	lowerCall(Call);
				1368	} else {
				1369	// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
				1370	Variable *T_1 = makeReg(IceType_i32);
				1371	Variable *T_2 = makeReg(Dest->getType());
				1372	_cvt(T_1, Src0RM);
				1373	_mov(T_2, T_1); // T_1 and T_2 may have different integer types
				1374	_mov(Dest, T_2);
				1375	T_2->setPreferredRegister(T_1, true);
				1376	}
				1377	break;
				1378	case InstCast::Fptoui:
				1379	if (Dest->getType() == IceType_i64 \|\| Dest->getType() == IceType_i32) {
				1380	// Use a helper for both x86-32 and x86-64.
				1381	split64(Dest);
				1382	const SizeT MaxSrcs = 1;
				1383	Type DestType = Dest->getType();
				1384	Type SrcType = Src0RM->getType();
				1385	IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");
				1386	IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");
				1387	// Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
				1388	IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
				1389	// TODO: Call the correct compiler-rt helper function.
				1390	InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
				1391	Call->addArg(Inst->getSrc(0));
				1392	lowerCall(Call);
				1393	return;
				1394	} else {
				1395	// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
				1396	Variable *T_1 = makeReg(IceType_i32);
				1397	Variable *T_2 = makeReg(Dest->getType());
				1398	_cvt(T_1, Src0RM);
				1399	_mov(T_2, T_1); // T_1 and T_2 may have different integer types
				1400	_mov(Dest, T_2);
				1401	T_2->setPreferredRegister(T_1, true);
				1402	}
				1403	break;
				1404	case InstCast::Sitofp:
				1405	if (Src0RM->getType() == IceType_i64) {
				1406	// Use a helper for x86-32.
				1407	const SizeT MaxSrcs = 1;
				1408	Type DestType = Dest->getType();
				1409	InstCall *Call = makeHelperCall(
				1410	DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);
				1411	// TODO: Call the correct compiler-rt helper function.
				1412	Call->addArg(Inst->getSrc(0));
				1413	lowerCall(Call);
				1414	return;
				1415	} else {
				1416	// Sign-extend the operand.
				1417	// t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
				1418	Variable *T_1 = makeReg(IceType_i32);
				1419	Variable *T_2 = makeReg(Dest->getType());
				1420	if (Src0RM->getType() == IceType_i32)
				1421	_mov(T_1, Src0RM);
				1422	else
				1423	_movsx(T_1, Src0RM);
				1424	_cvt(T_2, T_1);
				1425	_mov(Dest, T_2);
				1426	}
				1427	break;
				1428	case InstCast::Uitofp:
				1429	if (Src0RM->getType() == IceType_i64 \|\| Src0RM->getType() == IceType_i32) {
				1430	// Use a helper for x86-32 and x86-64. Also use a helper for
				1431	// i32 on x86-32.
				1432	const SizeT MaxSrcs = 1;
				1433	Type DestType = Dest->getType();
				1434	IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");
				1435	IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");
				1436	// Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
				1437	IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
				1438	// TODO: Call the correct compiler-rt helper function.
				1439	InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
				1440	Call->addArg(Inst->getSrc(0));
				1441	lowerCall(Call);
				1442	return;
				1443	} else {
				1444	// Zero-extend the operand.
				1445	// t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
				1446	Variable *T_1 = makeReg(IceType_i32);
				1447	Variable *T_2 = makeReg(Dest->getType());
				1448	if (Src0RM->getType() == IceType_i32)
				1449	_mov(T_1, Src0RM);
				1450	else
				1451	_movzx(T_1, Src0RM);
				1452	_cvt(T_2, T_1);
				1453	_mov(Dest, T_2);
				1454	}
				1455	break;
				1456	case InstCast::Bitcast:
				1457	if (Dest->getType() == Src0RM->getType()) {
				1458	InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);
				1459	lowerAssign(Assign);
				1460	return;
				1461	}
				1462	switch (Dest->getType()) {
				1463	default:
				1464	llvm_unreachable("Unexpected Bitcast dest type");
				1465	case IceType_i32:
				1466	case IceType_f32: {
				1467	Type DestType = Dest->getType();
				1468	Type SrcType = Src0RM->getType();
				1469	assert((DestType == IceType_i32 && SrcType == IceType_f32) \|\|
				1470	(DestType == IceType_f32 && SrcType == IceType_i32));
				1471	// a.i32 = bitcast b.f32 ==>
				1472	// t.f32 = b.f32
				1473	// s.f32 = spill t.f32
				1474	// a.i32 = s.f32
				1475	Variable *T = NULL;
				1476	// TODO: Should be able to force a spill setup by calling legalize() with
				1477	// Legal_Mem and not Legal_Reg or Legal_Imm.
				1478	Variable *Spill = Func->makeVariable(SrcType, Context.getNode());
				1479	Spill->setWeight(RegWeight::Zero);
				1480	Spill->setPreferredRegister(Dest, true);
				1481	_mov(T, Src0RM);
				1482	_mov(Spill, T);
				1483	_mov(Dest, Spill);
				1484	} break;
				1485	case IceType_i64: {
				1486	assert(Src0RM->getType() == IceType_f64);
				1487	// a.i64 = bitcast b.f64 ==>
				1488	// s.f64 = spill b.f64
				1489	// t_lo.i32 = lo(s.f64)
				1490	// a_lo.i32 = t_lo.i32
				1491	// t_hi.i32 = hi(s.f64)
				1492	// a_hi.i32 = t_hi.i32
				1493	Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
				1494	Spill->setWeight(RegWeight::Zero);
				1495	Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
				1496	_mov(Spill, Src0RM);
				1497
				1498	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1499	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1500	Variable *T_Lo = makeReg(IceType_i32);
				1501	Variable *T_Hi = makeReg(IceType_i32);
				1502	VariableSplit *SpillLo =
				1503	VariableSplit::create(Func, Spill, VariableSplit::Low);
				1504	VariableSplit *SpillHi =
				1505	VariableSplit::create(Func, Spill, VariableSplit::High);
				1506
				1507	_mov(T_Lo, SpillLo);
				1508	_mov(DestLo, T_Lo);
				1509	_mov(T_Hi, SpillHi);
				1510	_mov(DestHi, T_Hi);
				1511	} break;
				1512	case IceType_f64: {
				1513	assert(Src0RM->getType() == IceType_i64);
				1514	// a.f64 = bitcast b.i64 ==>
				1515	// t_lo.i32 = b_lo.i32
				1516	// lo(s.f64) = t_lo.i32
				1517	// FakeUse(s.f64)
				1518	// t_hi.i32 = b_hi.i32
				1519	// hi(s.f64) = t_hi.i32
				1520	// a.f64 = s.f64
				1521	Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
				1522	Spill->setWeight(RegWeight::Zero);
				1523	Spill->setPreferredRegister(Dest, true);
				1524
				1525	Context.insert(InstFakeDef::create(Func, Spill));
				1526
				1527	Variable T_Lo = NULL, T_Hi = NULL;
				1528	VariableSplit *SpillLo =
				1529	VariableSplit::create(Func, Spill, VariableSplit::Low);
				1530	VariableSplit *SpillHi =
				1531	VariableSplit::create(Func, Spill, VariableSplit::High);
				1532	_mov(T_Lo, loOperand(Src0RM));
				1533	_store(T_Lo, SpillLo);
				1534	_mov(T_Hi, hiOperand(Src0RM));
				1535	_store(T_Hi, SpillHi);
				1536	_mov(Dest, Spill);
				1537	} break;
				1538	}
				1539	break;
				1540	}
				1541	}
				1542
				1543	void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
				1544	Operand *Src0 = Inst->getSrc(0);
				1545	Operand *Src1 = Inst->getSrc(1);
				1546	Variable *Dest = Inst->getDest();
				1547	// Lowering a = fcmp cond, b, c
				1548	// ucomiss b, c /* only if C1 != Br_None */
				1549	// /* but swap b,c order if SwapOperands==true */
				1550	// mov a, <default>
				1551	// j<C1> label /* only if C1 != Br_None */
				1552	// j<C2> label /* only if C2 != Br_None */
				1553	// FakeUse(a) /* only if C1 != Br_None */
				1554	// mov a, !<default> /* only if C1 != Br_None */
				1555	// label: /* only if C1 != Br_None */
				1556	InstFcmp::FCond Condition = Inst->getCondition();
				1557	size_t Index = static_cast<size_t>(Condition);
				1558	assert(Index < TableFcmpSize);
				1559	if (TableFcmp[Index].SwapOperands) {
				1560	Operand *Tmp = Src0;
				1561	Src0 = Src1;
				1562	Src1 = Tmp;
				1563	}
				1564	bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
				1565	bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
				1566	if (HasC1) {
				1567	Src0 = legalize(Src0);
				1568	Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);
				1569	Variable *T = NULL;
				1570	_mov(T, Src0);
				1571	_ucomiss(T, Src1RM);
				1572	}
				1573	Constant *Default =
				1574	Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
				1575	_mov(Dest, Default);
				1576	if (HasC1) {
				1577	InstX8632Label *Label = InstX8632Label::create(Func, this);
				1578	_br(TableFcmp[Index].C1, Label);
				1579	if (HasC2) {
				1580	_br(TableFcmp[Index].C2, Label);
				1581	}
				1582	Context.insert(InstFakeUse::create(Func, Dest));
				1583	Constant *NonDefault =
				1584	Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
				1585	_mov(Dest, NonDefault);
				1586	Context.insert(Label);
				1587	}
				1588	}
				1589
				1590	void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
				1591	Operand *Src0 = legalize(Inst->getSrc(0));
				1592	Operand *Src1 = legalize(Inst->getSrc(1));
				1593	Variable *Dest = Inst->getDest();
				1594
				1595	// a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
				1596	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1597	Constant *One = Ctx->getConstantInt(IceType_i32, 1);
				1598	if (Src0->getType() == IceType_i64) {
				1599	InstIcmp::ICond Condition = Inst->getCondition();
				1600	size_t Index = static_cast<size_t>(Condition);
				1601	assert(Index < TableIcmp64Size);
				1602	Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);
				1603	Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);
				1604	if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {
				1605	InstX8632Label *Label = InstX8632Label::create(Func, this);
				1606	_mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
				1607	_cmp(loOperand(Src0), Src1LoRI);
				1608	_br(InstX8632Br::Br_ne, Label);
				1609	_cmp(hiOperand(Src0), Src1HiRI);
				1610	_br(InstX8632Br::Br_ne, Label);
				1611	Context.insert(InstFakeUse::create(Func, Dest));
				1612	_mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
				1613	Context.insert(Label);
				1614	} else {
				1615	InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
				1616	InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
				1617	_mov(Dest, One);
				1618	_cmp(hiOperand(Src0), Src1HiRI);
				1619	_br(TableIcmp64[Index].C1, LabelTrue);
				1620	_br(TableIcmp64[Index].C2, LabelFalse);
				1621	_cmp(loOperand(Src0), Src1LoRI);
				1622	_br(TableIcmp64[Index].C3, LabelTrue);
				1623	Context.insert(LabelFalse);
				1624	Context.insert(InstFakeUse::create(Func, Dest));
				1625	_mov(Dest, Zero);
				1626	Context.insert(LabelTrue);
				1627	}
				1628	return;
				1629	}
				1630
				1631	// If Src1 is an immediate, or known to be a physical register, we can
				1632	// allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
				1633	// a physical register. (Actually, either Src0 or Src1 can be chosen for
				1634	// the physical register, but unfortunately we have to commit to one or
				1635	// the other before register allocation.)
				1636	bool IsSrc1ImmOrReg = false;
				1637	if (llvm::isa<Constant>(Src1)) {
				1638	IsSrc1ImmOrReg = true;
				1639	} else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
				1640	if (Var->hasReg())
				1641	IsSrc1ImmOrReg = true;
				1642	}
				1643
				1644	// cmp b, c
				1645	Operand *Src0New =
				1646	legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
				1647	InstX8632Label *Label = InstX8632Label::create(Func, this);
				1648	_cmp(Src0New, Src1);
				1649	_mov(Dest, One);
				1650	_br(getIcmp32Mapping(Inst->getCondition()), Label);
				1651	Context.insert(InstFakeUse::create(Func, Dest));
				1652	_mov(Dest, Zero);
				1653	Context.insert(Label);
				1654	}
				1655
				1656	void TargetX8632::lowerLoad(const InstLoad *Inst) {
				1657	// A Load instruction can be treated the same as an Assign
				1658	// instruction, after the source operand is transformed into an
				1659	// OperandX8632Mem operand. Note that the address mode
				1660	// optimization already creates an OperandX8632Mem operand, so it
				1661	// doesn't need another level of transformation.
				1662	Type Ty = Inst->getDest()->getType();
				1663	Operand *Src0 = Inst->getSourceAddress();
				1664	// Address mode optimization already creates an OperandX8632Mem
				1665	// operand, so it doesn't need another level of transformation.
				1666	if (!llvm::isa<OperandX8632Mem>(Src0)) {
				1667	Variable *Base = llvm::dyn_cast<Variable>(Src0);
				1668	Constant *Offset = llvm::dyn_cast<Constant>(Src0);
				1669	assert(Base \|\| Offset);
				1670	Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);
				1671	}
				1672
				1673	InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
				1674	lowerAssign(Assign);
				1675	}
				1676
				1677	void TargetX8632::lowerPhi(const InstPhi * /Inst/) {
				1678	Func->setError("Phi found in regular instruction list");
				1679	}
				1680
				1681	void TargetX8632::lowerRet(const InstRet *Inst) {
				1682	Variable *Reg = NULL;
				1683	if (Inst->hasRetValue()) {
				1684	Operand *Src0 = legalize(Inst->getRetValue());
				1685	if (Src0->getType() == IceType_i64) {
				1686	Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);
				1687	Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);
				1688	Reg = eax;
				1689	Context.insert(InstFakeUse::create(Func, edx));
				1690	} else if (Src0->getType() == IceType_f32 \|\|
				1691	Src0->getType() == IceType_f64) {
				1692	_fld(Src0);
				1693	} else {
				1694	_mov(Reg, Src0, Reg_eax);
				1695	}
				1696	}
				1697	_ret(Reg);
				1698	// Add a fake use of esp to make sure esp stays alive for the entire
				1699	// function. Otherwise post-call esp adjustments get dead-code
				1700	// eliminated. TODO: Are there more places where the fake use
				1701	// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
				1702	// have a ret instruction.
				1703	Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
				1704	Context.insert(InstFakeUse::create(Func, esp));
				1705	}
				1706
				1707	void TargetX8632::lowerSelect(const InstSelect *Inst) {
				1708	// a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
				1709	Variable *Dest = Inst->getDest();
				1710	Operand *SrcT = Inst->getTrueOperand();
				1711	Operand *SrcF = Inst->getFalseOperand();
				1712	Operand *Condition = legalize(Inst->getCondition());
				1713	Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
				1714	InstX8632Label *Label = InstX8632Label::create(Func, this);
				1715
				1716	if (Dest->getType() == IceType_i64) {
				1717	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1718	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1719	Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg \| Legal_Imm, true);
				1720	Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Imm, true);
				1721	_cmp(Condition, Zero);
				1722	_mov(DestLo, SrcLoRI);
				1723	_mov(DestHi, SrcHiRI);
				1724	_br(InstX8632Br::Br_ne, Label);
				1725	Context.insert(InstFakeUse::create(Func, DestLo));
				1726	Context.insert(InstFakeUse::create(Func, DestHi));
				1727	Operand *SrcFLo = loOperand(SrcF);
				1728	Operand *SrcFHi = hiOperand(SrcF);
				1729	SrcLoRI = legalize(SrcFLo, Legal_Reg \| Legal_Imm, true);
				1730	SrcHiRI = legalize(SrcFHi, Legal_Reg \| Legal_Imm, true);
				1731	_mov(DestLo, SrcLoRI);
				1732	_mov(DestHi, SrcHiRI);
				1733	} else {
				1734	_cmp(Condition, Zero);
				1735	SrcT = legalize(SrcT, Legal_Reg \| Legal_Imm, true);
				1736	_mov(Dest, SrcT);
				1737	_br(InstX8632Br::Br_ne, Label);
				1738	Context.insert(InstFakeUse::create(Func, Dest));
				1739	SrcF = legalize(SrcF, Legal_Reg \| Legal_Imm, true);
				1740	_mov(Dest, SrcF);
				1741	}
				1742
				1743	Context.insert(Label);
				1744	}
				1745
				1746	void TargetX8632::lowerStore(const InstStore *Inst) {
				1747	Operand *Value = Inst->getData();
				1748	Operand *Addr = Inst->getAddr();
				1749	OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);
				1750	// Address mode optimization already creates an OperandX8632Mem
				1751	// operand, so it doesn't need another level of transformation.
				1752	if (!NewAddr) {
				1753	// The address will be either a constant (which represents a global
				1754	// variable) or a variable, so either the Base or Offset component
				1755	// of the OperandX8632Mem will be set.
				1756	Variable *Base = llvm::dyn_cast<Variable>(Addr);
				1757	Constant *Offset = llvm::dyn_cast<Constant>(Addr);
				1758	assert(Base \|\| Offset);
				1759	NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);
				1760	}
				1761	NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));
				1762
				1763	if (NewAddr->getType() == IceType_i64) {
				1764	Value = legalize(Value);
				1765	Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg \| Legal_Imm, true);
				1766	Operand *ValueLo = legalize(loOperand(Value), Legal_Reg \| Legal_Imm, true);
				1767	_store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
				1768	_store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
				1769	} else {
				1770	Value = legalize(Value, Legal_Reg \| Legal_Imm, true);
				1771	_store(Value, NewAddr);
				1772	}
				1773	}
				1774
				1775	void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
				1776	// This implements the most naive possible lowering.
				1777	// cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
				1778	Operand *Src0 = Inst->getComparison();
				1779	SizeT NumCases = Inst->getNumCases();
				1780	// OK, we'll be slightly less naive by forcing Src into a physical
				1781	// register if there are 2 or more uses.
				1782	if (NumCases >= 2)
				1783	Src0 = legalizeToVar(Src0, true);
				1784	else
				1785	Src0 = legalize(Src0, Legal_All, true);
				1786	for (SizeT I = 0; I < NumCases; ++I) {
				1787	Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
				1788	_cmp(Src0, Value);
				1789	_br(InstX8632Br::Br_e, Inst->getLabel(I));
				1790	}
				1791
				1792	_br(Inst->getLabelDefault());
				1793	}
				1794
				1795	void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) {
				1796	const SizeT MaxSrcs = 0;
				1797	Variable *Dest = NULL;
				1798	InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
				1799	lowerCall(Call);
				1800	}
				1801
				1802	Operand TargetX8632::legalize(Operand From, LegalMask Allowed,
				1803	bool AllowOverlap, int32_t RegNum) {
				1804	// Assert that a physical register is allowed. To date, all calls
				1805	// to legalize() allow a physical register. If a physical register
				1806	// needs to be explicitly disallowed, then new code will need to be
				1807	// written to force a spill.
				1808	assert(Allowed & Legal_Reg);
				1809	// If we're asking for a specific physical register, make sure we're
				1810	// not allowing any other operand kinds. (This could be future
				1811	// work, e.g. allow the shl shift amount to be either an immediate
				1812	// or in ecx.)
				1813	assert(RegNum == Variable::NoRegister \|\| Allowed == Legal_Reg);
				1814	if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
				1815	// Before doing anything with a Mem operand, we need to ensure
				1816	// that the Base and Index components are in physical registers.
				1817	Variable *Base = Mem->getBase();
				1818	Variable *Index = Mem->getIndex();
				1819	Variable *RegBase = NULL;
				1820	Variable *RegIndex = NULL;
				1821	if (Base) {
				1822	RegBase = legalizeToVar(Base, true);
				1823	}
				1824	if (Index) {
				1825	RegIndex = legalizeToVar(Index, true);
				1826	}
				1827	if (Base != RegBase \|\| Index != RegIndex) {
				1828	From =
				1829	OperandX8632Mem::create(Func, Mem->getType(), RegBase,
				1830	Mem->getOffset(), RegIndex, Mem->getShift());
				1831	}
				1832
				1833	if (!(Allowed & Legal_Mem)) {
				1834	Variable *Reg = makeReg(From->getType(), RegNum);
				1835	_mov(Reg, From, RegNum);
				1836	From = Reg;
				1837	}
				1838	return From;
				1839	}
				1840	if (llvm::isa<Constant>(From)) {
				1841	if (!(Allowed & Legal_Imm)) {
				1842	Variable *Reg = makeReg(From->getType(), RegNum);
				1843	_mov(Reg, From);
				1844	From = Reg;
				1845	}
				1846	return From;
				1847	}
				1848	if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
				1849	// We need a new physical register for the operand if:
				1850	// Mem is not allowed and Var->getRegNum() is unknown, or
				1851	// RegNum is required and Var->getRegNum() doesn't match.
				1852	if ((!(Allowed & Legal_Mem) && !Var->hasReg()) \|\|
				1853	(RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
				1854	Variable *Reg = makeReg(From->getType(), RegNum);
				1855	if (RegNum == Variable::NoRegister) {
				1856	Reg->setPreferredRegister(Var, AllowOverlap);
				1857	}
				1858	_mov(Reg, From);
				1859	From = Reg;
				1860	}
				1861	return From;
				1862	}
				1863	llvm_unreachable("Unhandled operand kind in legalize()");
				1864	return From;
				1865	}
				1866
				1867	// Provide a trivial wrapper to legalize() for this common usage.
				1868	Variable TargetX8632::legalizeToVar(Operand From, bool AllowOverlap,
				1869	int32_t RegNum) {
				1870	return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
				1871	}
				1872
				1873	Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
				1874	Variable *Reg = Func->makeVariable(Type, Context.getNode());
				1875	if (RegNum == Variable::NoRegister)
				1876	Reg->setWeightInfinite();
				1877	else
				1878	Reg->setRegNum(RegNum);
				1879	return Reg;
				1880	}
				1881
				1882	void TargetX8632::postLower() {
				1883	if (Ctx->getOptLevel() != Opt_m1)
				1884	return;
				1885	// TODO: Avoid recomputing WhiteList every instruction.
				1886	llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);
				1887	// Make one pass to black-list pre-colored registers. TODO: If
				1888	// there was some prior register allocation pass that made register
				1889	// assignments, those registers need to be black-listed here as
				1890	// well.
				1891	for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
				1892	++I) {
				1893	const Inst Inst = I;
				1894	if (Inst->isDeleted())
				1895	continue;
				1896	if (llvm::isa<InstFakeKill>(Inst))
				1897	continue;
				1898	SizeT VarIndex = 0;
				1899	for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
				1900	Operand *Src = Inst->getSrc(SrcNum);
				1901	SizeT NumVars = Src->getNumVars();
				1902	for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
				1903	const Variable *Var = Src->getVar(J);
				1904	if (!Var->hasReg())
				1905	continue;
				1906	WhiteList[Var->getRegNum()] = false;
				1907	}
				1908	}
				1909	}
				1910	// The second pass colors infinite-weight variables.
				1911	llvm::SmallBitVector AvailableRegisters = WhiteList;
				1912	for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
				1913	++I) {
				1914	const Inst Inst = I;
				1915	if (Inst->isDeleted())
				1916	continue;
				1917	SizeT VarIndex = 0;
				1918	for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
				1919	Operand *Src = Inst->getSrc(SrcNum);
				1920	SizeT NumVars = Src->getNumVars();
				1921	for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
				1922	Variable *Var = Src->getVar(J);
				1923	if (Var->hasReg())
				1924	continue;
				1925	if (!Var->getWeight().isInf())
				1926	continue;
				1927	llvm::SmallBitVector AvailableTypedRegisters =
				1928	AvailableRegisters & getRegisterSetForType(Var->getType());
				1929	if (!AvailableTypedRegisters.any()) {
				1930	// This is a hack in case we run out of physical registers
				1931	// due to an excessive number of "push" instructions from
				1932	// lowering a call.
				1933	AvailableRegisters = WhiteList;
				1934	AvailableTypedRegisters =
				1935	AvailableRegisters & getRegisterSetForType(Var->getType());
				1936	}
				1937	assert(AvailableTypedRegisters.any());
				1938	int32_t RegNum = AvailableTypedRegisters.find_first();
				1939	Var->setRegNum(RegNum);
				1940	AvailableRegisters[RegNum] = false;
				1941	}
				1942	}
				1943	}
				1944	}
				1945
Jim Stichnoth	f61d5b2	2014-05-23 13:31:24 -0700	[diff] [blame^]	1946	template <> void ConstantFloat::emit(const Cfg *Func) const {
				1947	Ostream &Str = Func->getContext()->getStrEmit();
				1948	// It would be better to prefix with ".L$" instead of "L$", but
				1949	// llvm-mc doesn't parse "dword ptr [.L$foo]".
				1950	Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
				1951	}
				1952
				1953	template <> void ConstantDouble::emit(const Cfg *Func) const {
				1954	Ostream &Str = Func->getContext()->getStrEmit();
				1955	Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
				1956	}
				1957
Jim Stichnoth	5bc2b1d	2014-05-22 13:38:48 -0700	[diff] [blame]	1958	} // end of namespace Ice