blob: 8ee5ac9282b6d2348db77cd0b0cf1260b2656280 [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
12// high-level instruction. It also implements
13// TargetX8632Fast::postLower() which does the simplest possible
14// register allocation for the "fast" target.
15//
16//===----------------------------------------------------------------------===//
17
18#include "IceDefs.h"
19#include "IceCfg.h"
20#include "IceCfgNode.h"
21#include "IceInstX8632.h"
22#include "IceOperand.h"
23#include "IceTargetLoweringX8632.def"
24#include "IceTargetLoweringX8632.h"
25
26namespace Ice {
27
28namespace {
29
30// The following table summarizes the logic for lowering the fcmp instruction.
31// There is one table entry for each of the 16 conditions. A comment in
32// lowerFcmp() describes the lowering template. In the most general case, there
33// is a compare followed by two conditional branches, because some fcmp
34// conditions don't map to a single x86 conditional branch. However, in many
35// cases it is possible to swap the operands in the comparison and have a single
36// conditional branch. Since it's quite tedious to validate the table by hand,
37// good execution tests are helpful.
38
39const struct TableFcmp_ {
40 uint32_t Default;
41 bool SwapOperands;
42 InstX8632Br::BrCond C1, C2;
43} TableFcmp[] = {
44#define X(val, dflt, swap, C1, C2) \
45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
46 ,
47 FCMPX8632_TABLE
48#undef X
49 };
50const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
51
52// The following table summarizes the logic for lowering the icmp instruction
53// for i32 and narrower types. Each icmp condition has a clear mapping to an
54// x86 conditional branch instruction.
55
56const struct TableIcmp32_ {
57 InstX8632Br::BrCond Mapping;
58} TableIcmp32[] = {
59#define X(val, C_32, C1_64, C2_64, C3_64) \
60 { InstX8632Br::C_32 } \
61 ,
62 ICMPX8632_TABLE
63#undef X
64 };
65const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
66
67// The following table summarizes the logic for lowering the icmp instruction
68// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
69// conditional branches are needed. For the other conditions, three separate
70// conditional branches are needed.
71const struct TableIcmp64_ {
72 InstX8632Br::BrCond C1, C2, C3;
73} TableIcmp64[] = {
74#define X(val, C_32, C1_64, C2_64, C3_64) \
75 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
76 ,
77 ICMPX8632_TABLE
78#undef X
79 };
80const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
81
82InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping;
86}
87
88// In some cases, there are x-macros tables for both high-level and
89// low-level instructions/operands that use the same enum key value.
90// The tables are kept separate to maintain a proper separation
91// between abstraction layers. There is a risk that the tables
92// could get out of sync if enum values are reordered or if entries
93// are added or deleted. This dummy function uses static_assert to
94// ensure everything is kept in sync.
95void xMacroIntegrityCheck() {
96 // Validate the enum values in FCMPX8632_TABLE.
97 {
98 // Define a temporary set of enum values based on low-level
99 // table entries.
100 enum _tmp_enum {
101#define X(val, dflt, swap, C1, C2) _tmp_##val,
102 FCMPX8632_TABLE
103#undef X
104 };
105// Define a set of constants based on high-level table entries.
106#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
107 ICEINSTFCMP_TABLE;
108#undef X
109// Define a set of constants based on low-level table entries,
110// and ensure the table entry keys are consistent.
111#define X(val, dflt, swap, C1, C2) \
112 static const int _table2_##val = _tmp_##val; \
113 STATIC_ASSERT(_table1_##val == _table2_##val);
114 FCMPX8632_TABLE;
115#undef X
116// Repeat the static asserts with respect to the high-level
117// table entries in case the high-level table has extra entries.
118#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
119 ICEINSTFCMP_TABLE;
120#undef X
121 }
122
123 // Validate the enum values in ICMPX8632_TABLE.
124 {
125 // Define a temporary set of enum values based on low-level
126 // table entries.
127 enum _tmp_enum {
128#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
129 ICMPX8632_TABLE
130#undef X
131 };
132// Define a set of constants based on high-level table entries.
133#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
134 ICEINSTICMP_TABLE;
135#undef X
136// Define a set of constants based on low-level table entries,
137// and ensure the table entry keys are consistent.
138#define X(val, C_32, C1_64, C2_64, C3_64) \
139 static const int _table2_##val = _tmp_##val; \
140 STATIC_ASSERT(_table1_##val == _table2_##val);
141 ICMPX8632_TABLE;
142#undef X
143// Repeat the static asserts with respect to the high-level
144// table entries in case the high-level table has extra entries.
145#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
146 ICEINSTICMP_TABLE;
147#undef X
148 }
149
150 // Validate the enum values in ICETYPEX8632_TABLE.
151 {
152 // Define a temporary set of enum values based on low-level
153 // table entries.
154 enum _tmp_enum {
155#define X(tag, cvt, sdss, width) _tmp_##tag,
156 ICETYPEX8632_TABLE
157#undef X
158 };
159// Define a set of constants based on high-level table entries.
160#define X(tag, size, align, str) static const int _table1_##tag = tag;
161 ICETYPE_TABLE;
162#undef X
163// Define a set of constants based on low-level table entries,
164// and ensure the table entry keys are consistent.
165#define X(tag, cvt, sdss, width) \
166 static const int _table2_##tag = _tmp_##tag; \
167 STATIC_ASSERT(_table1_##tag == _table2_##tag);
168 ICETYPEX8632_TABLE;
169#undef X
170// Repeat the static asserts with respect to the high-level
171// table entries in case the high-level table has extra entries.
172#define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
173 ICETYPE_TABLE;
174#undef X
175 }
176}
177
178} // end of anonymous namespace
179
180TargetX8632::TargetX8632(Cfg *Func)
181 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),
182 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
183 PhysicalRegisters(VarList(Reg_NUM)) {
184 // TODO: Don't initialize IntegerRegisters and friends every time.
185 // Instead, initialize in some sort of static initializer for the
186 // class.
187 llvm::SmallBitVector IntegerRegisters(Reg_NUM);
188 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
189 llvm::SmallBitVector FloatRegisters(Reg_NUM);
190 llvm::SmallBitVector InvalidRegisters(Reg_NUM);
191 ScratchRegs.resize(Reg_NUM);
192#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
193 frameptr, isI8, isInt, isFP) \
194 IntegerRegisters[val] = isInt; \
195 IntegerRegistersI8[val] = isI8; \
196 FloatRegisters[val] = isFP; \
197 ScratchRegs[val] = scratch;
198 REGX8632_TABLE;
199#undef X
200 TypeToRegisterSet[IceType_void] = InvalidRegisters;
201 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
202 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
203 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
204 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
205 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
206 TypeToRegisterSet[IceType_f32] = FloatRegisters;
207 TypeToRegisterSet[IceType_f64] = FloatRegisters;
208}
209
210void TargetX8632::translateOm1() {
211 GlobalContext *Context = Func->getContext();
212 Ostream &Str = Context->getStrDump();
213 Timer T_placePhiLoads;
214 Func->placePhiLoads();
215 if (Func->hasError())
216 return;
217 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
218 Timer T_placePhiStores;
219 Func->placePhiStores();
220 if (Func->hasError())
221 return;
222 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
223 Timer T_deletePhis;
224 Func->deletePhis();
225 if (Func->hasError())
226 return;
227 T_deletePhis.printElapsedUs(Context, "deletePhis()");
228 if (Context->isVerbose()) {
229 Str << "================ After Phi lowering ================\n";
230 Func->dump();
231 }
232
233 Timer T_genCode;
234 Func->genCode();
235 if (Func->hasError())
236 return;
237 T_genCode.printElapsedUs(Context, "genCode()");
238 if (Context->isVerbose()) {
239 Str << "================ After initial x8632 codegen ================\n";
240 Func->dump();
241 }
242
243 Timer T_genFrame;
244 Func->genFrame();
245 if (Func->hasError())
246 return;
247 T_genFrame.printElapsedUs(Context, "genFrame()");
248 if (Context->isVerbose()) {
249 Str << "================ After stack frame mapping ================\n";
250 Func->dump();
251 }
252}
253
254IceString TargetX8632::RegNames[] = {
255#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
256 frameptr, isI8, isInt, isFP) \
257 name,
258 REGX8632_TABLE
259#undef X
260};
261
262Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
263 assert(RegNum < PhysicalRegisters.size());
264 Variable *Reg = PhysicalRegisters[RegNum];
265 if (Reg == NULL) {
266 CfgNode *Node = NULL; // NULL means multi-block lifetime
267 Reg = Func->makeVariable(IceType_i32, Node);
268 Reg->setRegNum(RegNum);
269 PhysicalRegisters[RegNum] = Reg;
270 }
271 return Reg;
272}
273
274IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
275 assert(RegNum < Reg_NUM);
276 static IceString RegNames8[] = {
277#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
278 frameptr, isI8, isInt, isFP) \
279 "" name8,
280 REGX8632_TABLE
281#undef X
282 };
283 static IceString RegNames16[] = {
284#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
285 frameptr, isI8, isInt, isFP) \
286 "" name16,
287 REGX8632_TABLE
288#undef X
289 };
290 switch (Ty) {
291 case IceType_i1:
292 case IceType_i8:
293 return RegNames8[RegNum];
294 case IceType_i16:
295 return RegNames16[RegNum];
296 default:
297 return RegNames[RegNum];
298 }
299}
300
301void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const {
302 Ostream &Str = Ctx->getStrEmit();
303 assert(Var->getLocalUseNode() == NULL ||
304 Var->getLocalUseNode() == Func->getCurrentNode());
305 if (Var->hasReg()) {
306 Str << getRegName(Var->getRegNum(), Var->getType());
307 return;
308 }
309 Str << InstX8632::getWidthString(Var->getType());
310 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);
311 int32_t Offset = Var->getStackOffset() + getStackAdjustment();
312 if (Offset) {
313 if (Offset > 0)
314 Str << "+";
315 Str << Offset;
316 }
317 Str << "]";
318}
319
320// Helper function for addProlog(). Sets the frame offset for Arg,
321// updates InArgsSizeBytes according to Arg's width, and generates an
322// instruction to copy Arg into its assigned register if applicable.
323// For an I64 arg that has been split into Lo and Hi components, it
324// calls itself recursively on the components, taking care to handle
325// Lo first because of the little-endian architecture.
326void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
327 int32_t BasicFrameOffset,
328 int32_t &InArgsSizeBytes) {
329 Variable *Lo = Arg->getLo();
330 Variable *Hi = Arg->getHi();
331 Type Ty = Arg->getType();
332 if (Lo && Hi && Ty == IceType_i64) {
333 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
334 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
335 setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
336 setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
337 return;
338 }
339 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
340 if (Arg->hasReg()) {
341 assert(Ty != IceType_i64);
342 OperandX8632Mem *Mem = OperandX8632Mem::create(
343 Func, Ty, FramePtr,
344 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
345 _mov(Arg, Mem);
346 }
347 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
348}
349
350void TargetX8632::addProlog(CfgNode *Node) {
351 // If SimpleCoalescing is false, each variable without a register
352 // gets its own unique stack slot, which leads to large stack
353 // frames. If SimpleCoalescing is true, then each "global" variable
354 // without a register gets its own slot, but "local" variable slots
355 // are reused across basic blocks. E.g., if A and B are local to
356 // block 1 and C is local to block 2, then C may share a slot with A
357 // or B.
358 const bool SimpleCoalescing = true;
359 int32_t InArgsSizeBytes = 0;
360 int32_t RetIpSizeBytes = 4;
361 int32_t PreservedRegsSizeBytes = 0;
362 LocalsSizeBytes = 0;
363 Context.init(Node);
364 Context.setInsertPoint(Context.getCur());
365
366 // Determine stack frame offsets for each Variable without a
367 // register assignment. This can be done as one variable per stack
368 // slot. Or, do coalescing by running the register allocator again
369 // with an infinite set of registers (as a side effect, this gives
370 // variables a second chance at physical register assignment).
371 //
372 // A middle ground approach is to leverage sparsity and allocate one
373 // block of space on the frame for globals (variables with
374 // multi-block lifetime), and one block to share for locals
375 // (single-block lifetime).
376
377 llvm::SmallBitVector CalleeSaves =
378 getRegisterSet(RegSet_CalleeSave, RegSet_None);
379
380 int32_t GlobalsSize = 0;
381 std::vector<int> LocalsSize(Func->getNumNodes());
382
383 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
384 // LocalsSizeBytes.
385 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
386 const VarList &Variables = Func->getVariables();
387 const VarList &Args = Func->getArgs();
388 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
389 I != E; ++I) {
390 Variable *Var = *I;
391 if (Var->hasReg()) {
392 RegsUsed[Var->getRegNum()] = true;
393 continue;
394 }
395 // An argument passed on the stack already has a stack slot.
396 if (Var->getIsArg())
397 continue;
398 // A spill slot linked to a variable with a stack slot should reuse
399 // that stack slot.
400 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
401 if (Variable *Linked = Var->getPreferredRegister()) {
402 if (!Linked->hasReg())
403 continue;
404 }
405 }
406 int32_t Increment = typeWidthInBytesOnStack(Var->getType());
407 if (SimpleCoalescing) {
408 if (Var->isMultiblockLife()) {
409 GlobalsSize += Increment;
410 } else {
411 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
412 LocalsSize[NodeIndex] += Increment;
413 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
414 LocalsSizeBytes = LocalsSize[NodeIndex];
415 }
416 } else {
417 LocalsSizeBytes += Increment;
418 }
419 }
420 LocalsSizeBytes += GlobalsSize;
421
422 // Add push instructions for preserved registers.
423 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
424 if (CalleeSaves[i] && RegsUsed[i]) {
425 PreservedRegsSizeBytes += 4;
426 const bool SuppressStackAdjustment = true;
427 _push(getPhysicalRegister(i), SuppressStackAdjustment);
428 }
429 }
430
431 // Generate "push ebp; mov ebp, esp"
432 if (IsEbpBasedFrame) {
433 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
434 .count() == 0);
435 PreservedRegsSizeBytes += 4;
436 Variable *ebp = getPhysicalRegister(Reg_ebp);
437 Variable *esp = getPhysicalRegister(Reg_esp);
438 const bool SuppressStackAdjustment = true;
439 _push(ebp, SuppressStackAdjustment);
440 _mov(ebp, esp);
441 }
442
443 // Generate "sub esp, LocalsSizeBytes"
444 if (LocalsSizeBytes)
445 _sub(getPhysicalRegister(Reg_esp),
446 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
447
448 resetStackAdjustment();
449
450 // Fill in stack offsets for args, and copy args into registers for
451 // those that were register-allocated. Args are pushed right to
452 // left, so Arg[0] is closest to the stack/frame pointer.
453 //
454 // TODO: Make this right for different width args, calling
455 // conventions, etc. For one thing, args passed in registers will
456 // need to be copied/shuffled to their home registers (the
457 // RegManager code may have some permutation logic to leverage),
458 // and if they have no home register, home space will need to be
459 // allocated on the stack to copy into.
460 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
461 int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
462 if (!IsEbpBasedFrame)
463 BasicFrameOffset += LocalsSizeBytes;
464 for (SizeT i = 0; i < Args.size(); ++i) {
465 Variable *Arg = Args[i];
466 setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
467 }
468
469 // Fill in stack offsets for locals.
470 int32_t TotalGlobalsSize = GlobalsSize;
471 GlobalsSize = 0;
472 LocalsSize.assign(LocalsSize.size(), 0);
473 int32_t NextStackOffset = 0;
474 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
475 I != E; ++I) {
476 Variable *Var = *I;
477 if (Var->hasReg()) {
478 RegsUsed[Var->getRegNum()] = true;
479 continue;
480 }
481 if (Var->getIsArg())
482 continue;
483 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
484 if (Variable *Linked = Var->getPreferredRegister()) {
485 if (!Linked->hasReg()) {
486 // TODO: Make sure Linked has already been assigned a stack
487 // slot.
488 Var->setStackOffset(Linked->getStackOffset());
489 continue;
490 }
491 }
492 }
493 int32_t Increment = typeWidthInBytesOnStack(Var->getType());
494 if (SimpleCoalescing) {
495 if (Var->isMultiblockLife()) {
496 GlobalsSize += Increment;
497 NextStackOffset = GlobalsSize;
498 } else {
499 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
500 LocalsSize[NodeIndex] += Increment;
501 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
502 }
503 } else {
504 NextStackOffset += Increment;
505 }
506 if (IsEbpBasedFrame)
507 Var->setStackOffset(-NextStackOffset);
508 else
509 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
510 }
511 this->FrameSizeLocals = NextStackOffset;
512 this->HasComputedFrame = true;
513
514 if (Func->getContext()->isVerbose(IceV_Frame)) {
515 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
516 << "\n"
517 << "InArgsSizeBytes=" << InArgsSizeBytes
518 << "\n"
519 << "PreservedRegsSizeBytes="
520 << PreservedRegsSizeBytes << "\n";
521 }
522}
523
524void TargetX8632::addEpilog(CfgNode *Node) {
525 InstList &Insts = Node->getInsts();
526 InstList::reverse_iterator RI, E;
527 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
528 if (llvm::isa<InstX8632Ret>(*RI))
529 break;
530 }
531 if (RI == E)
532 return;
533
534 // Convert the reverse_iterator position into its corresponding
535 // (forward) iterator position.
536 InstList::iterator InsertPoint = RI.base();
537 --InsertPoint;
538 Context.init(Node);
539 Context.setInsertPoint(InsertPoint);
540
541 Variable *esp = getPhysicalRegister(Reg_esp);
542 if (IsEbpBasedFrame) {
543 Variable *ebp = getPhysicalRegister(Reg_ebp);
544 _mov(esp, ebp);
545 _pop(ebp);
546 } else {
547 // add esp, LocalsSizeBytes
548 if (LocalsSizeBytes)
549 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
550 }
551
552 // Add pop instructions for preserved registers.
553 llvm::SmallBitVector CalleeSaves =
554 getRegisterSet(RegSet_CalleeSave, RegSet_None);
555 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
556 SizeT j = CalleeSaves.size() - i - 1;
557 if (j == Reg_ebp && IsEbpBasedFrame)
558 continue;
559 if (CalleeSaves[j] && RegsUsed[j]) {
560 _pop(getPhysicalRegister(j));
561 }
562 }
563}
564
Jim Stichnothf61d5b22014-05-23 13:31:24 -0700565template <typename T> struct PoolTypeConverter {};
566
567template <> struct PoolTypeConverter<float> {
568 typedef float PrimitiveFpType;
569 typedef uint32_t PrimitiveIntType;
570 typedef ConstantFloat IceType;
571 static const Type Ty = IceType_f32;
572 static const char *TypeName;
573 static const char *AsmTag;
574 static const char *PrintfString;
575};
576const char *PoolTypeConverter<float>::TypeName = "float";
577const char *PoolTypeConverter<float>::AsmTag = ".long";
578const char *PoolTypeConverter<float>::PrintfString = "0x%x";
579
580template <> struct PoolTypeConverter<double> {
581 typedef double PrimitiveFpType;
582 typedef uint64_t PrimitiveIntType;
583 typedef ConstantDouble IceType;
584 static const Type Ty = IceType_f64;
585 static const char *TypeName;
586 static const char *AsmTag;
587 static const char *PrintfString;
588};
589const char *PoolTypeConverter<double>::TypeName = "double";
590const char *PoolTypeConverter<double>::AsmTag = ".quad";
591const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
592
593template <typename T> void TargetX8632::emitConstantPool() const {
594 Ostream &Str = Ctx->getStrEmit();
595 Type Ty = T::Ty;
596 SizeT Align = typeAlignInBytes(Ty);
597 ConstantList Pool = Ctx->getConstantPool(Ty);
598
599 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
600 << "\n";
601 Str << "\t.align\t" << Align << "\n";
602 for (ConstantList::const_iterator I = Pool.begin(), E = Pool.end(); I != E;
603 ++I) {
604 typename T::IceType *Const = llvm::cast<typename T::IceType>(*I);
605 typename T::PrimitiveFpType Value = Const->getValue();
606 // Use memcpy() to copy bits from Value into RawValue in a way
607 // that avoids breaking strict-aliasing rules.
608 typename T::PrimitiveIntType RawValue;
609 memcpy(&RawValue, &Value, sizeof(Value));
610 char buf[30];
611 int CharsPrinted =
612 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
613 assert(CharsPrinted >= 0 &&
614 (size_t)CharsPrinted < llvm::array_lengthof(buf));
615 (void)CharsPrinted; // avoid warnings if asserts are disabled
616 Str << "L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
617 Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
618 << Value << "\n";
619 }
620}
621
622void TargetX8632::emitConstants() const {
623 emitConstantPool<PoolTypeConverter<float> >();
624 emitConstantPool<PoolTypeConverter<double> >();
625
626 // No need to emit constants from the int pool since (for x86) they
627 // are embedded as immediates in the instructions.
628}
629
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700630void TargetX8632::split64(Variable *Var) {
631 switch (Var->getType()) {
632 default:
633 return;
634 case IceType_i64:
635 // TODO: Only consider F64 if we need to push each half when
636 // passing as an argument to a function call. Note that each half
637 // is still typed as I32.
638 case IceType_f64:
639 break;
640 }
641 Variable *Lo = Var->getLo();
642 Variable *Hi = Var->getHi();
643 if (Lo) {
644 assert(Hi);
645 return;
646 }
647 assert(Hi == NULL);
648 Lo = Func->makeVariable(IceType_i32, Context.getNode(),
649 Var->getName() + "__lo");
650 Hi = Func->makeVariable(IceType_i32, Context.getNode(),
651 Var->getName() + "__hi");
652 Var->setLoHi(Lo, Hi);
653 if (Var->getIsArg()) {
654 Lo->setIsArg(Func);
655 Hi->setIsArg(Func);
656 }
657}
658
659Operand *TargetX8632::loOperand(Operand *Operand) {
660 assert(Operand->getType() == IceType_i64);
661 if (Operand->getType() != IceType_i64)
662 return Operand;
663 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
664 split64(Var);
665 return Var->getLo();
666 }
667 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
668 uint64_t Mask = (1ull << 32) - 1;
669 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);
670 }
671 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
672 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
673 Mem->getOffset(), Mem->getIndex(),
674 Mem->getShift());
675 }
676 llvm_unreachable("Unsupported operand type");
677 return NULL;
678}
679
680Operand *TargetX8632::hiOperand(Operand *Operand) {
681 assert(Operand->getType() == IceType_i64);
682 if (Operand->getType() != IceType_i64)
683 return Operand;
684 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
685 split64(Var);
686 return Var->getHi();
687 }
688 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
689 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);
690 }
691 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
692 Constant *Offset = Mem->getOffset();
693 if (Offset == NULL)
694 Offset = Ctx->getConstantInt(IceType_i32, 4);
695 else if (ConstantInteger *IntOffset =
696 llvm::dyn_cast<ConstantInteger>(Offset)) {
697 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());
698 } else if (ConstantRelocatable *SymOffset =
699 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
700 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
701 SymOffset->getName());
702 }
703 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
704 Mem->getIndex(), Mem->getShift());
705 }
706 llvm_unreachable("Unsupported operand type");
707 return NULL;
708}
709
710llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
711 RegSetMask Exclude) const {
712 llvm::SmallBitVector Registers(Reg_NUM);
713
714#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
715 frameptr, isI8, isInt, isFP) \
716 if (scratch && (Include & RegSet_CallerSave)) \
717 Registers[val] = true; \
718 if (preserved && (Include & RegSet_CalleeSave)) \
719 Registers[val] = true; \
720 if (stackptr && (Include & RegSet_StackPointer)) \
721 Registers[val] = true; \
722 if (frameptr && (Include & RegSet_FramePointer)) \
723 Registers[val] = true; \
724 if (scratch && (Exclude & RegSet_CallerSave)) \
725 Registers[val] = false; \
726 if (preserved && (Exclude & RegSet_CalleeSave)) \
727 Registers[val] = false; \
728 if (stackptr && (Exclude & RegSet_StackPointer)) \
729 Registers[val] = false; \
730 if (frameptr && (Exclude & RegSet_FramePointer)) \
731 Registers[val] = false;
732
733 REGX8632_TABLE
734
735#undef X
736
737 return Registers;
738}
739
740void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
741 IsEbpBasedFrame = true;
742 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
743 // the number of adjustments of esp, etc.
744 Variable *esp = getPhysicalRegister(Reg_esp);
745 Operand *TotalSize = legalize(Inst->getSizeInBytes());
746 Variable *Dest = Inst->getDest();
747 _sub(esp, TotalSize);
748 _mov(Dest, esp);
749}
750
751void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
752 Variable *Dest = Inst->getDest();
753 Operand *Src0 = legalize(Inst->getSrc(0));
754 Operand *Src1 = legalize(Inst->getSrc(1));
755 if (Dest->getType() == IceType_i64) {
756 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
757 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
758 Operand *Src0Lo = loOperand(Src0);
759 Operand *Src0Hi = hiOperand(Src0);
760 Operand *Src1Lo = loOperand(Src1);
761 Operand *Src1Hi = hiOperand(Src1);
762 Variable *T_Lo = NULL, *T_Hi = NULL;
763 switch (Inst->getOp()) {
764 case InstArithmetic::Add:
765 _mov(T_Lo, Src0Lo);
766 _add(T_Lo, Src1Lo);
767 _mov(DestLo, T_Lo);
768 _mov(T_Hi, Src0Hi);
769 _adc(T_Hi, Src1Hi);
770 _mov(DestHi, T_Hi);
771 break;
772 case InstArithmetic::And:
773 _mov(T_Lo, Src0Lo);
774 _and(T_Lo, Src1Lo);
775 _mov(DestLo, T_Lo);
776 _mov(T_Hi, Src0Hi);
777 _and(T_Hi, Src1Hi);
778 _mov(DestHi, T_Hi);
779 break;
780 case InstArithmetic::Or:
781 _mov(T_Lo, Src0Lo);
782 _or(T_Lo, Src1Lo);
783 _mov(DestLo, T_Lo);
784 _mov(T_Hi, Src0Hi);
785 _or(T_Hi, Src1Hi);
786 _mov(DestHi, T_Hi);
787 break;
788 case InstArithmetic::Xor:
789 _mov(T_Lo, Src0Lo);
790 _xor(T_Lo, Src1Lo);
791 _mov(DestLo, T_Lo);
792 _mov(T_Hi, Src0Hi);
793 _xor(T_Hi, Src1Hi);
794 _mov(DestHi, T_Hi);
795 break;
796 case InstArithmetic::Sub:
797 _mov(T_Lo, Src0Lo);
798 _sub(T_Lo, Src1Lo);
799 _mov(DestLo, T_Lo);
800 _mov(T_Hi, Src0Hi);
801 _sbb(T_Hi, Src1Hi);
802 _mov(DestHi, T_Hi);
803 break;
804 case InstArithmetic::Mul: {
805 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
806 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
807 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);
808 // gcc does the following:
809 // a=b*c ==>
810 // t1 = b.hi; t1 *=(imul) c.lo
811 // t2 = c.hi; t2 *=(imul) b.lo
812 // t3:eax = b.lo
813 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
814 // a.lo = t4.lo
815 // t4.hi += t1
816 // t4.hi += t2
817 // a.hi = t4.hi
818 _mov(T_1, Src0Hi);
819 _imul(T_1, Src1Lo);
820 _mov(T_2, Src1Hi);
821 _imul(T_2, Src0Lo);
822 _mov(T_3, Src0Lo, Reg_eax);
823 _mul(T_4Lo, T_3, Src1Lo);
824 // The mul instruction produces two dest variables, edx:eax. We
825 // create a fake definition of edx to account for this.
826 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
827 _mov(DestLo, T_4Lo);
828 _add(T_4Hi, T_1);
829 _add(T_4Hi, T_2);
830 _mov(DestHi, T_4Hi);
831 } break;
832 case InstArithmetic::Shl: {
833 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
834 // gcc does the following:
835 // a=b<<c ==>
836 // t1:ecx = c.lo & 0xff
837 // t2 = b.lo
838 // t3 = b.hi
839 // t3 = shld t3, t2, t1
840 // t2 = shl t2, t1
841 // test t1, 0x20
842 // je L1
843 // use(t3)
844 // t3 = t2
845 // t2 = 0
846 // L1:
847 // a.lo = t2
848 // a.hi = t3
849 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
850 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
851 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
852 InstX8632Label *Label = InstX8632Label::create(Func, this);
853 _mov(T_1, Src1Lo, Reg_ecx);
854 _mov(T_2, Src0Lo);
855 _mov(T_3, Src0Hi);
856 _shld(T_3, T_2, T_1);
857 _shl(T_2, T_1);
858 _test(T_1, BitTest);
859 _br(InstX8632Br::Br_e, Label);
860 // Because of the intra-block control flow, we need to fake a use
861 // of T_3 to prevent its earlier definition from being dead-code
862 // eliminated in the presence of its later definition.
863 Context.insert(InstFakeUse::create(Func, T_3));
864 _mov(T_3, T_2);
865 _mov(T_2, Zero);
866 Context.insert(Label);
867 _mov(DestLo, T_2);
868 _mov(DestHi, T_3);
869 } break;
870 case InstArithmetic::Lshr: {
871 // a=b>>c (unsigned) ==>
872 // t1:ecx = c.lo & 0xff
873 // t2 = b.lo
874 // t3 = b.hi
875 // t2 = shrd t2, t3, t1
876 // t3 = shr t3, t1
877 // test t1, 0x20
878 // je L1
879 // use(t2)
880 // t2 = t3
881 // t3 = 0
882 // L1:
883 // a.lo = t2
884 // a.hi = t3
885 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
886 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
887 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
888 InstX8632Label *Label = InstX8632Label::create(Func, this);
889 _mov(T_1, Src1Lo, Reg_ecx);
890 _mov(T_2, Src0Lo);
891 _mov(T_3, Src0Hi);
892 _shrd(T_2, T_3, T_1);
893 _shr(T_3, T_1);
894 _test(T_1, BitTest);
895 _br(InstX8632Br::Br_e, Label);
896 // Because of the intra-block control flow, we need to fake a use
897 // of T_3 to prevent its earlier definition from being dead-code
898 // eliminated in the presence of its later definition.
899 Context.insert(InstFakeUse::create(Func, T_2));
900 _mov(T_2, T_3);
901 _mov(T_3, Zero);
902 Context.insert(Label);
903 _mov(DestLo, T_2);
904 _mov(DestHi, T_3);
905 } break;
906 case InstArithmetic::Ashr: {
907 // a=b>>c (signed) ==>
908 // t1:ecx = c.lo & 0xff
909 // t2 = b.lo
910 // t3 = b.hi
911 // t2 = shrd t2, t3, t1
912 // t3 = sar t3, t1
913 // test t1, 0x20
914 // je L1
915 // use(t2)
916 // t2 = t3
917 // t3 = sar t3, 0x1f
918 // L1:
919 // a.lo = t2
920 // a.hi = t3
921 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
922 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
923 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);
924 InstX8632Label *Label = InstX8632Label::create(Func, this);
925 _mov(T_1, Src1Lo, Reg_ecx);
926 _mov(T_2, Src0Lo);
927 _mov(T_3, Src0Hi);
928 _shrd(T_2, T_3, T_1);
929 _sar(T_3, T_1);
930 _test(T_1, BitTest);
931 _br(InstX8632Br::Br_e, Label);
932 // Because of the intra-block control flow, we need to fake a use
933 // of T_3 to prevent its earlier definition from being dead-code
934 // eliminated in the presence of its later definition.
935 Context.insert(InstFakeUse::create(Func, T_2));
936 _mov(T_2, T_3);
937 _sar(T_3, SignExtend);
938 Context.insert(Label);
939 _mov(DestLo, T_2);
940 _mov(DestHi, T_3);
941 } break;
942 case InstArithmetic::Udiv: {
943 const SizeT MaxSrcs = 2;
944 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
945 Call->addArg(Inst->getSrc(0));
946 Call->addArg(Inst->getSrc(1));
947 lowerCall(Call);
948 } break;
949 case InstArithmetic::Sdiv: {
950 const SizeT MaxSrcs = 2;
951 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
952 Call->addArg(Inst->getSrc(0));
953 Call->addArg(Inst->getSrc(1));
954 lowerCall(Call);
955 } break;
956 case InstArithmetic::Urem: {
957 const SizeT MaxSrcs = 2;
958 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
959 Call->addArg(Inst->getSrc(0));
960 Call->addArg(Inst->getSrc(1));
961 lowerCall(Call);
962 } break;
963 case InstArithmetic::Srem: {
964 const SizeT MaxSrcs = 2;
965 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
966 Call->addArg(Inst->getSrc(0));
967 Call->addArg(Inst->getSrc(1));
968 lowerCall(Call);
969 } break;
970 case InstArithmetic::Fadd:
971 case InstArithmetic::Fsub:
972 case InstArithmetic::Fmul:
973 case InstArithmetic::Fdiv:
974 case InstArithmetic::Frem:
975 llvm_unreachable("FP instruction with i64 type");
976 break;
977 }
978 } else { // Dest->getType() != IceType_i64
979 Variable *T_edx = NULL;
980 Variable *T = NULL;
981 switch (Inst->getOp()) {
982 case InstArithmetic::Add:
983 _mov(T, Src0);
984 _add(T, Src1);
985 _mov(Dest, T);
986 break;
987 case InstArithmetic::And:
988 _mov(T, Src0);
989 _and(T, Src1);
990 _mov(Dest, T);
991 break;
992 case InstArithmetic::Or:
993 _mov(T, Src0);
994 _or(T, Src1);
995 _mov(Dest, T);
996 break;
997 case InstArithmetic::Xor:
998 _mov(T, Src0);
999 _xor(T, Src1);
1000 _mov(Dest, T);
1001 break;
1002 case InstArithmetic::Sub:
1003 _mov(T, Src0);
1004 _sub(T, Src1);
1005 _mov(Dest, T);
1006 break;
1007 case InstArithmetic::Mul:
1008 // TODO: Optimize for llvm::isa<Constant>(Src1)
1009 // TODO: Strength-reduce multiplications by a constant,
1010 // particularly -1 and powers of 2. Advanced: use lea to
1011 // multiply by 3, 5, 9.
1012 //
1013 // The 8-bit version of imul only allows the form "imul r/m8"
1014 // where T must be in eax.
1015 if (Dest->getType() == IceType_i8)
1016 _mov(T, Src0, Reg_eax);
1017 else
1018 _mov(T, Src0);
1019 _imul(T, Src1);
1020 _mov(Dest, T);
1021 break;
1022 case InstArithmetic::Shl:
1023 _mov(T, Src0);
1024 if (!llvm::isa<Constant>(Src1))
1025 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1026 _shl(T, Src1);
1027 _mov(Dest, T);
1028 break;
1029 case InstArithmetic::Lshr:
1030 _mov(T, Src0);
1031 if (!llvm::isa<Constant>(Src1))
1032 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1033 _shr(T, Src1);
1034 _mov(Dest, T);
1035 break;
1036 case InstArithmetic::Ashr:
1037 _mov(T, Src0);
1038 if (!llvm::isa<Constant>(Src1))
1039 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1040 _sar(T, Src1);
1041 _mov(Dest, T);
1042 break;
1043 case InstArithmetic::Udiv:
1044 if (Dest->getType() == IceType_i8) {
1045 Variable *T_ah = NULL;
1046 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
1047 _mov(T, Src0, Reg_eax);
1048 _mov(T_ah, Zero, Reg_ah);
1049 _div(T, Src1, T_ah);
1050 _mov(Dest, T);
1051 } else {
1052 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1053 _mov(T, Src0, Reg_eax);
1054 _mov(T_edx, Zero, Reg_edx);
1055 _div(T, Src1, T_edx);
1056 _mov(Dest, T);
1057 }
1058 break;
1059 case InstArithmetic::Sdiv:
1060 T_edx = makeReg(IceType_i32, Reg_edx);
1061 _mov(T, Src0, Reg_eax);
1062 _cdq(T_edx, T);
1063 _idiv(T, Src1, T_edx);
1064 _mov(Dest, T);
1065 break;
1066 case InstArithmetic::Urem:
1067 if (Dest->getType() == IceType_i8) {
1068 Variable *T_ah = NULL;
1069 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
1070 _mov(T, Src0, Reg_eax);
1071 _mov(T_ah, Zero, Reg_ah);
1072 _div(T_ah, Src1, T);
1073 _mov(Dest, T_ah);
1074 } else {
1075 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1076 _mov(T_edx, Zero, Reg_edx);
1077 _mov(T, Src0, Reg_eax);
1078 _div(T_edx, Src1, T);
1079 _mov(Dest, T_edx);
1080 }
1081 break;
1082 case InstArithmetic::Srem:
1083 T_edx = makeReg(IceType_i32, Reg_edx);
1084 _mov(T, Src0, Reg_eax);
1085 _cdq(T_edx, T);
1086 _idiv(T_edx, Src1, T);
1087 _mov(Dest, T_edx);
1088 break;
1089 case InstArithmetic::Fadd:
1090 _mov(T, Src0);
1091 _addss(T, Src1);
1092 _mov(Dest, T);
1093 break;
1094 case InstArithmetic::Fsub:
1095 _mov(T, Src0);
1096 _subss(T, Src1);
1097 _mov(Dest, T);
1098 break;
1099 case InstArithmetic::Fmul:
1100 _mov(T, Src0);
1101 _mulss(T, Src1);
1102 _mov(Dest, T);
1103 break;
1104 case InstArithmetic::Fdiv:
1105 _mov(T, Src0);
1106 _divss(T, Src1);
1107 _mov(Dest, T);
1108 break;
1109 case InstArithmetic::Frem: {
1110 const SizeT MaxSrcs = 2;
1111 Type Ty = Dest->getType();
1112 InstCall *Call =
1113 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);
1114 Call->addArg(Src0);
1115 Call->addArg(Src1);
1116 return lowerCall(Call);
1117 } break;
1118 }
1119 }
1120}
1121
1122void TargetX8632::lowerAssign(const InstAssign *Inst) {
1123 Variable *Dest = Inst->getDest();
1124 Operand *Src0 = Inst->getSrc(0);
1125 assert(Dest->getType() == Src0->getType());
1126 if (Dest->getType() == IceType_i64) {
1127 Src0 = legalize(Src0);
1128 Operand *Src0Lo = loOperand(Src0);
1129 Operand *Src0Hi = hiOperand(Src0);
1130 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1131 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1132 Variable *T_Lo = NULL, *T_Hi = NULL;
1133 _mov(T_Lo, Src0Lo);
1134 _mov(DestLo, T_Lo);
1135 _mov(T_Hi, Src0Hi);
1136 _mov(DestHi, T_Hi);
1137 } else {
1138 const bool AllowOverlap = true;
1139 // RI is either a physical register or an immediate.
1140 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap);
1141 _mov(Dest, RI);
1142 }
1143}
1144
1145void TargetX8632::lowerBr(const InstBr *Inst) {
1146 if (Inst->isUnconditional()) {
1147 _br(Inst->getTargetUnconditional());
1148 } else {
1149 Operand *Src0 = legalize(Inst->getCondition());
1150 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1151 _cmp(Src0, Zero);
1152 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1153 }
1154}
1155
1156void TargetX8632::lowerCall(const InstCall *Instr) {
1157 // Generate a sequence of push instructions, pushing right to left,
1158 // keeping track of stack offsets in case a push involves a stack
1159 // operand and we are using an esp-based frame.
1160 uint32_t StackOffset = 0;
1161 // TODO: If for some reason the call instruction gets dead-code
1162 // eliminated after lowering, we would need to ensure that the
1163 // pre-call push instructions and the post-call esp adjustment get
1164 // eliminated as well.
1165 for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {
1166 Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));
1167 if (Arg->getType() == IceType_i64) {
1168 _push(hiOperand(Arg));
1169 _push(loOperand(Arg));
1170 } else if (Arg->getType() == IceType_f64) {
1171 // If the Arg turns out to be a memory operand, we need to push
1172 // 8 bytes, which requires two push instructions. This ends up
1173 // being somewhat clumsy in the current IR, so we use a
1174 // workaround. Force the operand into a (xmm) register, and
1175 // then push the register. An xmm register push is actually not
1176 // possible in x86, but the Push instruction emitter handles
1177 // this by decrementing the stack pointer and directly writing
1178 // the xmm register value.
1179 Variable *T = NULL;
1180 _mov(T, Arg);
1181 _push(T);
1182 } else {
1183 _push(Arg);
1184 }
1185 StackOffset += typeWidthInBytesOnStack(Arg->getType());
1186 }
1187 // Generate the call instruction. Assign its result to a temporary
1188 // with high register allocation weight.
1189 Variable *Dest = Instr->getDest();
1190 Variable *eax = NULL; // doubles as RegLo as necessary
1191 Variable *edx = NULL;
1192 if (Dest) {
1193 switch (Dest->getType()) {
1194 case IceType_NUM:
1195 llvm_unreachable("Invalid Call dest type");
1196 break;
1197 case IceType_void:
1198 break;
1199 case IceType_i1:
1200 case IceType_i8:
1201 case IceType_i16:
1202 case IceType_i32:
1203 eax = makeReg(Dest->getType(), Reg_eax);
1204 break;
1205 case IceType_i64:
1206 eax = makeReg(IceType_i32, Reg_eax);
1207 edx = makeReg(IceType_i32, Reg_edx);
1208 break;
1209 case IceType_f32:
1210 case IceType_f64:
1211 // Leave eax==edx==NULL, and capture the result with the fstp
1212 // instruction.
1213 break;
1214 }
1215 }
1216 Operand *CallTarget = legalize(Instr->getCallTarget());
1217 Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);
1218 Context.insert(NewCall);
1219 if (edx)
1220 Context.insert(InstFakeDef::create(Func, edx));
1221
1222 // Add the appropriate offset to esp.
1223 if (StackOffset) {
1224 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1225 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
1226 }
1227
1228 // Insert a register-kill pseudo instruction.
1229 VarList KilledRegs;
1230 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1231 if (ScratchRegs[i])
1232 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1233 }
1234 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
1235
1236 // Generate a FakeUse to keep the call live if necessary.
1237 if (Instr->hasSideEffects() && eax) {
1238 Inst *FakeUse = InstFakeUse::create(Func, eax);
1239 Context.insert(FakeUse);
1240 }
1241
1242 // Generate Dest=eax assignment.
1243 if (Dest && eax) {
1244 if (edx) {
1245 split64(Dest);
1246 Variable *DestLo = Dest->getLo();
1247 Variable *DestHi = Dest->getHi();
1248 DestLo->setPreferredRegister(eax, false);
1249 DestHi->setPreferredRegister(edx, false);
1250 _mov(DestLo, eax);
1251 _mov(DestHi, edx);
1252 } else {
1253 Dest->setPreferredRegister(eax, false);
1254 _mov(Dest, eax);
1255 }
1256 }
1257
1258 // Special treatment for an FP function which returns its result in
1259 // st(0).
1260 if (Dest &&
1261 (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) {
1262 _fstp(Dest);
1263 // If Dest ends up being a physical xmm register, the fstp emit
1264 // code will route st(0) through a temporary stack slot.
1265 }
1266}
1267
1268void TargetX8632::lowerCast(const InstCast *Inst) {
1269 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1270 InstCast::OpKind CastKind = Inst->getCastKind();
1271 Variable *Dest = Inst->getDest();
1272 // Src0RM is the source operand legalized to physical register or memory, but
1273 // not immediate, since the relevant x86 native instructions don't allow an
1274 // immediate operand. If the operand is an immediate, we could consider
1275 // computing the strength-reduced result at translation time, but we're
1276 // unlikely to see something like that in the bitcode that the optimizer
1277 // wouldn't have already taken care of.
1278 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem, true);
1279 switch (CastKind) {
1280 default:
1281 Func->setError("Cast type not supported");
1282 return;
1283 case InstCast::Sext:
1284 if (Dest->getType() == IceType_i64) {
1285 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1286 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1287 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1288 Variable *T_Lo = makeReg(DestLo->getType());
1289 if (Src0RM->getType() == IceType_i32)
1290 _mov(T_Lo, Src0RM);
1291 else
1292 _movsx(T_Lo, Src0RM);
1293 _mov(DestLo, T_Lo);
1294 Variable *T_Hi = NULL;
1295 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);
1296 _mov(T_Hi, T_Lo);
1297 _sar(T_Hi, Shift);
1298 _mov(DestHi, T_Hi);
1299 } else {
1300 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
1301 // also copy to the high operand of a 64-bit variable.
1302 // t1 = movsx src; dst = t1
1303 Variable *T = makeReg(Dest->getType());
1304 _movsx(T, Src0RM);
1305 _mov(Dest, T);
1306 }
1307 break;
1308 case InstCast::Zext:
1309 if (Dest->getType() == IceType_i64) {
1310 // t1=movzx src; dst.lo=t1; dst.hi=0
1311 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1312 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1313 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1314 Variable *Tmp = makeReg(DestLo->getType());
1315 if (Src0RM->getType() == IceType_i32)
1316 _mov(Tmp, Src0RM);
1317 else
1318 _movzx(Tmp, Src0RM);
1319 _mov(DestLo, Tmp);
1320 _mov(DestHi, Zero);
1321 } else if (Src0RM->getType() == IceType_i1) {
1322 // t = Src0RM; t &= 1; Dest = t
1323 Operand *One = Ctx->getConstantInt(IceType_i32, 1);
1324 Variable *T = makeReg(IceType_i32);
1325 _movzx(T, Src0RM);
1326 _and(T, One);
1327 _mov(Dest, T);
1328 } else {
1329 // t1 = movzx src; dst = t1
1330 Variable *T = makeReg(Dest->getType());
1331 _movzx(T, Src0RM);
1332 _mov(Dest, T);
1333 }
1334 break;
1335 case InstCast::Trunc: {
1336 if (Src0RM->getType() == IceType_i64)
1337 Src0RM = loOperand(Src0RM);
1338 // t1 = trunc Src0RM; Dest = t1
1339 Variable *T = NULL;
1340 _mov(T, Src0RM);
1341 _mov(Dest, T);
1342 break;
1343 }
1344 case InstCast::Fptrunc:
1345 case InstCast::Fpext: {
1346 // t1 = cvt Src0RM; Dest = t1
1347 Variable *T = makeReg(Dest->getType());
1348 _cvt(T, Src0RM);
1349 _mov(Dest, T);
1350 break;
1351 }
1352 case InstCast::Fptosi:
1353 if (Dest->getType() == IceType_i64) {
1354 // Use a helper for converting floating-point values to 64-bit
1355 // integers. SSE2 appears to have no way to convert from xmm
1356 // registers to something like the edx:eax register pair, and
1357 // gcc and clang both want to use x87 instructions complete with
1358 // temporary manipulation of the status word. This helper is
1359 // not needed for x86-64.
1360 split64(Dest);
1361 const SizeT MaxSrcs = 1;
1362 Type SrcType = Inst->getSrc(0)->getType();
1363 InstCall *Call = makeHelperCall(
1364 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);
1365 // TODO: Call the correct compiler-rt helper function.
1366 Call->addArg(Inst->getSrc(0));
1367 lowerCall(Call);
1368 } else {
1369 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1370 Variable *T_1 = makeReg(IceType_i32);
1371 Variable *T_2 = makeReg(Dest->getType());
1372 _cvt(T_1, Src0RM);
1373 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1374 _mov(Dest, T_2);
1375 T_2->setPreferredRegister(T_1, true);
1376 }
1377 break;
1378 case InstCast::Fptoui:
1379 if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) {
1380 // Use a helper for both x86-32 and x86-64.
1381 split64(Dest);
1382 const SizeT MaxSrcs = 1;
1383 Type DestType = Dest->getType();
1384 Type SrcType = Src0RM->getType();
1385 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");
1386 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");
1387 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
1388 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
1389 // TODO: Call the correct compiler-rt helper function.
1390 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1391 Call->addArg(Inst->getSrc(0));
1392 lowerCall(Call);
1393 return;
1394 } else {
1395 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1396 Variable *T_1 = makeReg(IceType_i32);
1397 Variable *T_2 = makeReg(Dest->getType());
1398 _cvt(T_1, Src0RM);
1399 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1400 _mov(Dest, T_2);
1401 T_2->setPreferredRegister(T_1, true);
1402 }
1403 break;
1404 case InstCast::Sitofp:
1405 if (Src0RM->getType() == IceType_i64) {
1406 // Use a helper for x86-32.
1407 const SizeT MaxSrcs = 1;
1408 Type DestType = Dest->getType();
1409 InstCall *Call = makeHelperCall(
1410 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);
1411 // TODO: Call the correct compiler-rt helper function.
1412 Call->addArg(Inst->getSrc(0));
1413 lowerCall(Call);
1414 return;
1415 } else {
1416 // Sign-extend the operand.
1417 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
1418 Variable *T_1 = makeReg(IceType_i32);
1419 Variable *T_2 = makeReg(Dest->getType());
1420 if (Src0RM->getType() == IceType_i32)
1421 _mov(T_1, Src0RM);
1422 else
1423 _movsx(T_1, Src0RM);
1424 _cvt(T_2, T_1);
1425 _mov(Dest, T_2);
1426 }
1427 break;
1428 case InstCast::Uitofp:
1429 if (Src0RM->getType() == IceType_i64 || Src0RM->getType() == IceType_i32) {
1430 // Use a helper for x86-32 and x86-64. Also use a helper for
1431 // i32 on x86-32.
1432 const SizeT MaxSrcs = 1;
1433 Type DestType = Dest->getType();
1434 IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");
1435 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");
1436 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
1437 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
1438 // TODO: Call the correct compiler-rt helper function.
1439 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1440 Call->addArg(Inst->getSrc(0));
1441 lowerCall(Call);
1442 return;
1443 } else {
1444 // Zero-extend the operand.
1445 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
1446 Variable *T_1 = makeReg(IceType_i32);
1447 Variable *T_2 = makeReg(Dest->getType());
1448 if (Src0RM->getType() == IceType_i32)
1449 _mov(T_1, Src0RM);
1450 else
1451 _movzx(T_1, Src0RM);
1452 _cvt(T_2, T_1);
1453 _mov(Dest, T_2);
1454 }
1455 break;
1456 case InstCast::Bitcast:
1457 if (Dest->getType() == Src0RM->getType()) {
1458 InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);
1459 lowerAssign(Assign);
1460 return;
1461 }
1462 switch (Dest->getType()) {
1463 default:
1464 llvm_unreachable("Unexpected Bitcast dest type");
1465 case IceType_i32:
1466 case IceType_f32: {
1467 Type DestType = Dest->getType();
1468 Type SrcType = Src0RM->getType();
1469 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
1470 (DestType == IceType_f32 && SrcType == IceType_i32));
1471 // a.i32 = bitcast b.f32 ==>
1472 // t.f32 = b.f32
1473 // s.f32 = spill t.f32
1474 // a.i32 = s.f32
1475 Variable *T = NULL;
1476 // TODO: Should be able to force a spill setup by calling legalize() with
1477 // Legal_Mem and not Legal_Reg or Legal_Imm.
1478 Variable *Spill = Func->makeVariable(SrcType, Context.getNode());
1479 Spill->setWeight(RegWeight::Zero);
1480 Spill->setPreferredRegister(Dest, true);
1481 _mov(T, Src0RM);
1482 _mov(Spill, T);
1483 _mov(Dest, Spill);
1484 } break;
1485 case IceType_i64: {
1486 assert(Src0RM->getType() == IceType_f64);
1487 // a.i64 = bitcast b.f64 ==>
1488 // s.f64 = spill b.f64
1489 // t_lo.i32 = lo(s.f64)
1490 // a_lo.i32 = t_lo.i32
1491 // t_hi.i32 = hi(s.f64)
1492 // a_hi.i32 = t_hi.i32
1493 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1494 Spill->setWeight(RegWeight::Zero);
1495 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
1496 _mov(Spill, Src0RM);
1497
1498 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1499 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1500 Variable *T_Lo = makeReg(IceType_i32);
1501 Variable *T_Hi = makeReg(IceType_i32);
1502 VariableSplit *SpillLo =
1503 VariableSplit::create(Func, Spill, VariableSplit::Low);
1504 VariableSplit *SpillHi =
1505 VariableSplit::create(Func, Spill, VariableSplit::High);
1506
1507 _mov(T_Lo, SpillLo);
1508 _mov(DestLo, T_Lo);
1509 _mov(T_Hi, SpillHi);
1510 _mov(DestHi, T_Hi);
1511 } break;
1512 case IceType_f64: {
1513 assert(Src0RM->getType() == IceType_i64);
1514 // a.f64 = bitcast b.i64 ==>
1515 // t_lo.i32 = b_lo.i32
1516 // lo(s.f64) = t_lo.i32
1517 // FakeUse(s.f64)
1518 // t_hi.i32 = b_hi.i32
1519 // hi(s.f64) = t_hi.i32
1520 // a.f64 = s.f64
1521 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1522 Spill->setWeight(RegWeight::Zero);
1523 Spill->setPreferredRegister(Dest, true);
1524
1525 Context.insert(InstFakeDef::create(Func, Spill));
1526
1527 Variable *T_Lo = NULL, *T_Hi = NULL;
1528 VariableSplit *SpillLo =
1529 VariableSplit::create(Func, Spill, VariableSplit::Low);
1530 VariableSplit *SpillHi =
1531 VariableSplit::create(Func, Spill, VariableSplit::High);
1532 _mov(T_Lo, loOperand(Src0RM));
1533 _store(T_Lo, SpillLo);
1534 _mov(T_Hi, hiOperand(Src0RM));
1535 _store(T_Hi, SpillHi);
1536 _mov(Dest, Spill);
1537 } break;
1538 }
1539 break;
1540 }
1541}
1542
1543void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
1544 Operand *Src0 = Inst->getSrc(0);
1545 Operand *Src1 = Inst->getSrc(1);
1546 Variable *Dest = Inst->getDest();
1547 // Lowering a = fcmp cond, b, c
1548 // ucomiss b, c /* only if C1 != Br_None */
1549 // /* but swap b,c order if SwapOperands==true */
1550 // mov a, <default>
1551 // j<C1> label /* only if C1 != Br_None */
1552 // j<C2> label /* only if C2 != Br_None */
1553 // FakeUse(a) /* only if C1 != Br_None */
1554 // mov a, !<default> /* only if C1 != Br_None */
1555 // label: /* only if C1 != Br_None */
1556 InstFcmp::FCond Condition = Inst->getCondition();
1557 size_t Index = static_cast<size_t>(Condition);
1558 assert(Index < TableFcmpSize);
1559 if (TableFcmp[Index].SwapOperands) {
1560 Operand *Tmp = Src0;
1561 Src0 = Src1;
1562 Src1 = Tmp;
1563 }
1564 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
1565 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
1566 if (HasC1) {
1567 Src0 = legalize(Src0);
1568 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
1569 Variable *T = NULL;
1570 _mov(T, Src0);
1571 _ucomiss(T, Src1RM);
1572 }
1573 Constant *Default =
1574 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
1575 _mov(Dest, Default);
1576 if (HasC1) {
1577 InstX8632Label *Label = InstX8632Label::create(Func, this);
1578 _br(TableFcmp[Index].C1, Label);
1579 if (HasC2) {
1580 _br(TableFcmp[Index].C2, Label);
1581 }
1582 Context.insert(InstFakeUse::create(Func, Dest));
1583 Constant *NonDefault =
1584 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
1585 _mov(Dest, NonDefault);
1586 Context.insert(Label);
1587 }
1588}
1589
1590void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
1591 Operand *Src0 = legalize(Inst->getSrc(0));
1592 Operand *Src1 = legalize(Inst->getSrc(1));
1593 Variable *Dest = Inst->getDest();
1594
1595 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
1596 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1597 Constant *One = Ctx->getConstantInt(IceType_i32, 1);
1598 if (Src0->getType() == IceType_i64) {
1599 InstIcmp::ICond Condition = Inst->getCondition();
1600 size_t Index = static_cast<size_t>(Condition);
1601 assert(Index < TableIcmp64Size);
1602 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
1603 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
1604 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
1605 InstX8632Label *Label = InstX8632Label::create(Func, this);
1606 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
1607 _cmp(loOperand(Src0), Src1LoRI);
1608 _br(InstX8632Br::Br_ne, Label);
1609 _cmp(hiOperand(Src0), Src1HiRI);
1610 _br(InstX8632Br::Br_ne, Label);
1611 Context.insert(InstFakeUse::create(Func, Dest));
1612 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
1613 Context.insert(Label);
1614 } else {
1615 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
1616 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
1617 _mov(Dest, One);
1618 _cmp(hiOperand(Src0), Src1HiRI);
1619 _br(TableIcmp64[Index].C1, LabelTrue);
1620 _br(TableIcmp64[Index].C2, LabelFalse);
1621 _cmp(loOperand(Src0), Src1LoRI);
1622 _br(TableIcmp64[Index].C3, LabelTrue);
1623 Context.insert(LabelFalse);
1624 Context.insert(InstFakeUse::create(Func, Dest));
1625 _mov(Dest, Zero);
1626 Context.insert(LabelTrue);
1627 }
1628 return;
1629 }
1630
1631 // If Src1 is an immediate, or known to be a physical register, we can
1632 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
1633 // a physical register. (Actually, either Src0 or Src1 can be chosen for
1634 // the physical register, but unfortunately we have to commit to one or
1635 // the other before register allocation.)
1636 bool IsSrc1ImmOrReg = false;
1637 if (llvm::isa<Constant>(Src1)) {
1638 IsSrc1ImmOrReg = true;
1639 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
1640 if (Var->hasReg())
1641 IsSrc1ImmOrReg = true;
1642 }
1643
1644 // cmp b, c
1645 Operand *Src0New =
1646 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
1647 InstX8632Label *Label = InstX8632Label::create(Func, this);
1648 _cmp(Src0New, Src1);
1649 _mov(Dest, One);
1650 _br(getIcmp32Mapping(Inst->getCondition()), Label);
1651 Context.insert(InstFakeUse::create(Func, Dest));
1652 _mov(Dest, Zero);
1653 Context.insert(Label);
1654}
1655
1656void TargetX8632::lowerLoad(const InstLoad *Inst) {
1657 // A Load instruction can be treated the same as an Assign
1658 // instruction, after the source operand is transformed into an
1659 // OperandX8632Mem operand. Note that the address mode
1660 // optimization already creates an OperandX8632Mem operand, so it
1661 // doesn't need another level of transformation.
1662 Type Ty = Inst->getDest()->getType();
1663 Operand *Src0 = Inst->getSourceAddress();
1664 // Address mode optimization already creates an OperandX8632Mem
1665 // operand, so it doesn't need another level of transformation.
1666 if (!llvm::isa<OperandX8632Mem>(Src0)) {
1667 Variable *Base = llvm::dyn_cast<Variable>(Src0);
1668 Constant *Offset = llvm::dyn_cast<Constant>(Src0);
1669 assert(Base || Offset);
1670 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);
1671 }
1672
1673 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
1674 lowerAssign(Assign);
1675}
1676
1677void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
1678 Func->setError("Phi found in regular instruction list");
1679}
1680
1681void TargetX8632::lowerRet(const InstRet *Inst) {
1682 Variable *Reg = NULL;
1683 if (Inst->hasRetValue()) {
1684 Operand *Src0 = legalize(Inst->getRetValue());
1685 if (Src0->getType() == IceType_i64) {
1686 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);
1687 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);
1688 Reg = eax;
1689 Context.insert(InstFakeUse::create(Func, edx));
1690 } else if (Src0->getType() == IceType_f32 ||
1691 Src0->getType() == IceType_f64) {
1692 _fld(Src0);
1693 } else {
1694 _mov(Reg, Src0, Reg_eax);
1695 }
1696 }
1697 _ret(Reg);
1698 // Add a fake use of esp to make sure esp stays alive for the entire
1699 // function. Otherwise post-call esp adjustments get dead-code
1700 // eliminated. TODO: Are there more places where the fake use
1701 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
1702 // have a ret instruction.
1703 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1704 Context.insert(InstFakeUse::create(Func, esp));
1705}
1706
1707void TargetX8632::lowerSelect(const InstSelect *Inst) {
1708 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
1709 Variable *Dest = Inst->getDest();
1710 Operand *SrcT = Inst->getTrueOperand();
1711 Operand *SrcF = Inst->getFalseOperand();
1712 Operand *Condition = legalize(Inst->getCondition());
1713 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1714 InstX8632Label *Label = InstX8632Label::create(Func, this);
1715
1716 if (Dest->getType() == IceType_i64) {
1717 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1718 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1719 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
1720 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
1721 _cmp(Condition, Zero);
1722 _mov(DestLo, SrcLoRI);
1723 _mov(DestHi, SrcHiRI);
1724 _br(InstX8632Br::Br_ne, Label);
1725 Context.insert(InstFakeUse::create(Func, DestLo));
1726 Context.insert(InstFakeUse::create(Func, DestHi));
1727 Operand *SrcFLo = loOperand(SrcF);
1728 Operand *SrcFHi = hiOperand(SrcF);
1729 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);
1730 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);
1731 _mov(DestLo, SrcLoRI);
1732 _mov(DestHi, SrcHiRI);
1733 } else {
1734 _cmp(Condition, Zero);
1735 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
1736 _mov(Dest, SrcT);
1737 _br(InstX8632Br::Br_ne, Label);
1738 Context.insert(InstFakeUse::create(Func, Dest));
1739 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);
1740 _mov(Dest, SrcF);
1741 }
1742
1743 Context.insert(Label);
1744}
1745
1746void TargetX8632::lowerStore(const InstStore *Inst) {
1747 Operand *Value = Inst->getData();
1748 Operand *Addr = Inst->getAddr();
1749 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);
1750 // Address mode optimization already creates an OperandX8632Mem
1751 // operand, so it doesn't need another level of transformation.
1752 if (!NewAddr) {
1753 // The address will be either a constant (which represents a global
1754 // variable) or a variable, so either the Base or Offset component
1755 // of the OperandX8632Mem will be set.
1756 Variable *Base = llvm::dyn_cast<Variable>(Addr);
1757 Constant *Offset = llvm::dyn_cast<Constant>(Addr);
1758 assert(Base || Offset);
1759 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);
1760 }
1761 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));
1762
1763 if (NewAddr->getType() == IceType_i64) {
1764 Value = legalize(Value);
1765 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
1766 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
1767 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
1768 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
1769 } else {
1770 Value = legalize(Value, Legal_Reg | Legal_Imm, true);
1771 _store(Value, NewAddr);
1772 }
1773}
1774
1775void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
1776 // This implements the most naive possible lowering.
1777 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
1778 Operand *Src0 = Inst->getComparison();
1779 SizeT NumCases = Inst->getNumCases();
1780 // OK, we'll be slightly less naive by forcing Src into a physical
1781 // register if there are 2 or more uses.
1782 if (NumCases >= 2)
1783 Src0 = legalizeToVar(Src0, true);
1784 else
1785 Src0 = legalize(Src0, Legal_All, true);
1786 for (SizeT I = 0; I < NumCases; ++I) {
1787 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
1788 _cmp(Src0, Value);
1789 _br(InstX8632Br::Br_e, Inst->getLabel(I));
1790 }
1791
1792 _br(Inst->getLabelDefault());
1793}
1794
1795void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
1796 const SizeT MaxSrcs = 0;
1797 Variable *Dest = NULL;
1798 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
1799 lowerCall(Call);
1800}
1801
1802Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
1803 bool AllowOverlap, int32_t RegNum) {
1804 // Assert that a physical register is allowed. To date, all calls
1805 // to legalize() allow a physical register. If a physical register
1806 // needs to be explicitly disallowed, then new code will need to be
1807 // written to force a spill.
1808 assert(Allowed & Legal_Reg);
1809 // If we're asking for a specific physical register, make sure we're
1810 // not allowing any other operand kinds. (This could be future
1811 // work, e.g. allow the shl shift amount to be either an immediate
1812 // or in ecx.)
1813 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
1814 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
1815 // Before doing anything with a Mem operand, we need to ensure
1816 // that the Base and Index components are in physical registers.
1817 Variable *Base = Mem->getBase();
1818 Variable *Index = Mem->getIndex();
1819 Variable *RegBase = NULL;
1820 Variable *RegIndex = NULL;
1821 if (Base) {
1822 RegBase = legalizeToVar(Base, true);
1823 }
1824 if (Index) {
1825 RegIndex = legalizeToVar(Index, true);
1826 }
1827 if (Base != RegBase || Index != RegIndex) {
1828 From =
1829 OperandX8632Mem::create(Func, Mem->getType(), RegBase,
1830 Mem->getOffset(), RegIndex, Mem->getShift());
1831 }
1832
1833 if (!(Allowed & Legal_Mem)) {
1834 Variable *Reg = makeReg(From->getType(), RegNum);
1835 _mov(Reg, From, RegNum);
1836 From = Reg;
1837 }
1838 return From;
1839 }
1840 if (llvm::isa<Constant>(From)) {
1841 if (!(Allowed & Legal_Imm)) {
1842 Variable *Reg = makeReg(From->getType(), RegNum);
1843 _mov(Reg, From);
1844 From = Reg;
1845 }
1846 return From;
1847 }
1848 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
1849 // We need a new physical register for the operand if:
1850 // Mem is not allowed and Var->getRegNum() is unknown, or
1851 // RegNum is required and Var->getRegNum() doesn't match.
1852 if ((!(Allowed & Legal_Mem) && !Var->hasReg()) ||
1853 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
1854 Variable *Reg = makeReg(From->getType(), RegNum);
1855 if (RegNum == Variable::NoRegister) {
1856 Reg->setPreferredRegister(Var, AllowOverlap);
1857 }
1858 _mov(Reg, From);
1859 From = Reg;
1860 }
1861 return From;
1862 }
1863 llvm_unreachable("Unhandled operand kind in legalize()");
1864 return From;
1865}
1866
1867// Provide a trivial wrapper to legalize() for this common usage.
1868Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,
1869 int32_t RegNum) {
1870 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
1871}
1872
1873Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
1874 Variable *Reg = Func->makeVariable(Type, Context.getNode());
1875 if (RegNum == Variable::NoRegister)
1876 Reg->setWeightInfinite();
1877 else
1878 Reg->setRegNum(RegNum);
1879 return Reg;
1880}
1881
1882void TargetX8632::postLower() {
1883 if (Ctx->getOptLevel() != Opt_m1)
1884 return;
1885 // TODO: Avoid recomputing WhiteList every instruction.
1886 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);
1887 // Make one pass to black-list pre-colored registers. TODO: If
1888 // there was some prior register allocation pass that made register
1889 // assignments, those registers need to be black-listed here as
1890 // well.
1891 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
1892 ++I) {
1893 const Inst *Inst = *I;
1894 if (Inst->isDeleted())
1895 continue;
1896 if (llvm::isa<InstFakeKill>(Inst))
1897 continue;
1898 SizeT VarIndex = 0;
1899 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
1900 Operand *Src = Inst->getSrc(SrcNum);
1901 SizeT NumVars = Src->getNumVars();
1902 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
1903 const Variable *Var = Src->getVar(J);
1904 if (!Var->hasReg())
1905 continue;
1906 WhiteList[Var->getRegNum()] = false;
1907 }
1908 }
1909 }
1910 // The second pass colors infinite-weight variables.
1911 llvm::SmallBitVector AvailableRegisters = WhiteList;
1912 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
1913 ++I) {
1914 const Inst *Inst = *I;
1915 if (Inst->isDeleted())
1916 continue;
1917 SizeT VarIndex = 0;
1918 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
1919 Operand *Src = Inst->getSrc(SrcNum);
1920 SizeT NumVars = Src->getNumVars();
1921 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
1922 Variable *Var = Src->getVar(J);
1923 if (Var->hasReg())
1924 continue;
1925 if (!Var->getWeight().isInf())
1926 continue;
1927 llvm::SmallBitVector AvailableTypedRegisters =
1928 AvailableRegisters & getRegisterSetForType(Var->getType());
1929 if (!AvailableTypedRegisters.any()) {
1930 // This is a hack in case we run out of physical registers
1931 // due to an excessive number of "push" instructions from
1932 // lowering a call.
1933 AvailableRegisters = WhiteList;
1934 AvailableTypedRegisters =
1935 AvailableRegisters & getRegisterSetForType(Var->getType());
1936 }
1937 assert(AvailableTypedRegisters.any());
1938 int32_t RegNum = AvailableTypedRegisters.find_first();
1939 Var->setRegNum(RegNum);
1940 AvailableRegisters[RegNum] = false;
1941 }
1942 }
1943 }
1944}
1945
Jim Stichnothf61d5b22014-05-23 13:31:24 -07001946template <> void ConstantFloat::emit(const Cfg *Func) const {
1947 Ostream &Str = Func->getContext()->getStrEmit();
1948 // It would be better to prefix with ".L$" instead of "L$", but
1949 // llvm-mc doesn't parse "dword ptr [.L$foo]".
1950 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
1951}
1952
1953template <> void ConstantDouble::emit(const Cfg *Func) const {
1954 Ostream &Str = Func->getContext()->getStrEmit();
1955 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
1956}
1957
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001958} // end of namespace Ice