blob: 6a09276f0b52f838ffa479345a3c99386b1140ab [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
12// high-level instruction. It also implements
13// TargetX8632Fast::postLower() which does the simplest possible
14// register allocation for the "fast" target.
15//
16//===----------------------------------------------------------------------===//
17
18#include "IceDefs.h"
19#include "IceCfg.h"
20#include "IceCfgNode.h"
21#include "IceInstX8632.h"
22#include "IceOperand.h"
23#include "IceTargetLoweringX8632.def"
24#include "IceTargetLoweringX8632.h"
25
26namespace Ice {
27
28namespace {
29
30// The following table summarizes the logic for lowering the fcmp instruction.
31// There is one table entry for each of the 16 conditions. A comment in
32// lowerFcmp() describes the lowering template. In the most general case, there
33// is a compare followed by two conditional branches, because some fcmp
34// conditions don't map to a single x86 conditional branch. However, in many
35// cases it is possible to swap the operands in the comparison and have a single
36// conditional branch. Since it's quite tedious to validate the table by hand,
37// good execution tests are helpful.
38
39const struct TableFcmp_ {
40 uint32_t Default;
41 bool SwapOperands;
42 InstX8632Br::BrCond C1, C2;
43} TableFcmp[] = {
44#define X(val, dflt, swap, C1, C2) \
45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
46 ,
47 FCMPX8632_TABLE
48#undef X
49 };
50const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
51
52// The following table summarizes the logic for lowering the icmp instruction
53// for i32 and narrower types. Each icmp condition has a clear mapping to an
54// x86 conditional branch instruction.
55
56const struct TableIcmp32_ {
57 InstX8632Br::BrCond Mapping;
58} TableIcmp32[] = {
59#define X(val, C_32, C1_64, C2_64, C3_64) \
60 { InstX8632Br::C_32 } \
61 ,
62 ICMPX8632_TABLE
63#undef X
64 };
65const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
66
67// The following table summarizes the logic for lowering the icmp instruction
68// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
69// conditional branches are needed. For the other conditions, three separate
70// conditional branches are needed.
71const struct TableIcmp64_ {
72 InstX8632Br::BrCond C1, C2, C3;
73} TableIcmp64[] = {
74#define X(val, C_32, C1_64, C2_64, C3_64) \
75 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
76 ,
77 ICMPX8632_TABLE
78#undef X
79 };
80const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
81
82InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping;
86}
87
88// In some cases, there are x-macros tables for both high-level and
89// low-level instructions/operands that use the same enum key value.
90// The tables are kept separate to maintain a proper separation
91// between abstraction layers. There is a risk that the tables
92// could get out of sync if enum values are reordered or if entries
93// are added or deleted. This dummy function uses static_assert to
94// ensure everything is kept in sync.
95void xMacroIntegrityCheck() {
96 // Validate the enum values in FCMPX8632_TABLE.
97 {
98 // Define a temporary set of enum values based on low-level
99 // table entries.
100 enum _tmp_enum {
101#define X(val, dflt, swap, C1, C2) _tmp_##val,
102 FCMPX8632_TABLE
103#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700104 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700105 };
106// Define a set of constants based on high-level table entries.
107#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
108 ICEINSTFCMP_TABLE;
109#undef X
110// Define a set of constants based on low-level table entries,
111// and ensure the table entry keys are consistent.
112#define X(val, dflt, swap, C1, C2) \
113 static const int _table2_##val = _tmp_##val; \
114 STATIC_ASSERT(_table1_##val == _table2_##val);
115 FCMPX8632_TABLE;
116#undef X
117// Repeat the static asserts with respect to the high-level
118// table entries in case the high-level table has extra entries.
119#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
120 ICEINSTFCMP_TABLE;
121#undef X
122 }
123
124 // Validate the enum values in ICMPX8632_TABLE.
125 {
126 // Define a temporary set of enum values based on low-level
127 // table entries.
128 enum _tmp_enum {
129#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
130 ICMPX8632_TABLE
131#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700132 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700133 };
134// Define a set of constants based on high-level table entries.
135#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
136 ICEINSTICMP_TABLE;
137#undef X
138// Define a set of constants based on low-level table entries,
139// and ensure the table entry keys are consistent.
140#define X(val, C_32, C1_64, C2_64, C3_64) \
141 static const int _table2_##val = _tmp_##val; \
142 STATIC_ASSERT(_table1_##val == _table2_##val);
143 ICMPX8632_TABLE;
144#undef X
145// Repeat the static asserts with respect to the high-level
146// table entries in case the high-level table has extra entries.
147#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
148 ICEINSTICMP_TABLE;
149#undef X
150 }
151
152 // Validate the enum values in ICETYPEX8632_TABLE.
153 {
154 // Define a temporary set of enum values based on low-level
155 // table entries.
156 enum _tmp_enum {
157#define X(tag, cvt, sdss, width) _tmp_##tag,
158 ICETYPEX8632_TABLE
159#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700160 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700161 };
162// Define a set of constants based on high-level table entries.
163#define X(tag, size, align, str) static const int _table1_##tag = tag;
164 ICETYPE_TABLE;
165#undef X
166// Define a set of constants based on low-level table entries,
167// and ensure the table entry keys are consistent.
168#define X(tag, cvt, sdss, width) \
169 static const int _table2_##tag = _tmp_##tag; \
170 STATIC_ASSERT(_table1_##tag == _table2_##tag);
171 ICETYPEX8632_TABLE;
172#undef X
173// Repeat the static asserts with respect to the high-level
174// table entries in case the high-level table has extra entries.
175#define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
176 ICETYPE_TABLE;
177#undef X
178 }
179}
180
181} // end of anonymous namespace
182
183TargetX8632::TargetX8632(Cfg *Func)
184 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),
185 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
186 PhysicalRegisters(VarList(Reg_NUM)) {
187 // TODO: Don't initialize IntegerRegisters and friends every time.
188 // Instead, initialize in some sort of static initializer for the
189 // class.
190 llvm::SmallBitVector IntegerRegisters(Reg_NUM);
191 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
192 llvm::SmallBitVector FloatRegisters(Reg_NUM);
193 llvm::SmallBitVector InvalidRegisters(Reg_NUM);
194 ScratchRegs.resize(Reg_NUM);
195#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
196 frameptr, isI8, isInt, isFP) \
197 IntegerRegisters[val] = isInt; \
198 IntegerRegistersI8[val] = isI8; \
199 FloatRegisters[val] = isFP; \
200 ScratchRegs[val] = scratch;
201 REGX8632_TABLE;
202#undef X
203 TypeToRegisterSet[IceType_void] = InvalidRegisters;
204 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
205 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
206 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
207 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
208 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
209 TypeToRegisterSet[IceType_f32] = FloatRegisters;
210 TypeToRegisterSet[IceType_f64] = FloatRegisters;
211}
212
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700213void TargetX8632::translateO2() {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700214 GlobalContext *Context = Func->getContext();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700215
216 // Lower Phi instructions.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700217 Timer T_placePhiLoads;
218 Func->placePhiLoads();
219 if (Func->hasError())
220 return;
221 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
222 Timer T_placePhiStores;
223 Func->placePhiStores();
224 if (Func->hasError())
225 return;
226 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
227 Timer T_deletePhis;
228 Func->deletePhis();
229 if (Func->hasError())
230 return;
231 T_deletePhis.printElapsedUs(Context, "deletePhis()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700232 Func->dump("After Phi lowering");
233
234 // Address mode optimization.
235 Timer T_doAddressOpt;
236 Func->doAddressOpt();
237 T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()");
238
239 // Target lowering. This requires liveness analysis for some parts
240 // of the lowering decisions, such as compare/branch fusing. If
241 // non-lightweight liveness analysis is used, the instructions need
242 // to be renumbered first. TODO: This renumbering should only be
243 // necessary if we're actually calculating live intervals, which we
244 // only do for register allocation.
245 Timer T_renumber1;
246 Func->renumberInstructions();
247 if (Func->hasError())
248 return;
249 T_renumber1.printElapsedUs(Context, "renumberInstructions()");
250 // TODO: It should be sufficient to use the fastest liveness
251 // calculation, i.e. livenessLightweight(). However, for some
252 // reason that slows down the rest of the translation. Investigate.
253 Timer T_liveness1;
254 Func->liveness(Liveness_Basic);
255 if (Func->hasError())
256 return;
257 T_liveness1.printElapsedUs(Context, "liveness()");
258 Func->dump("After x86 address mode opt");
259 Timer T_genCode;
260 Func->genCode();
261 if (Func->hasError())
262 return;
263 T_genCode.printElapsedUs(Context, "genCode()");
264
265 // Register allocation. This requires instruction renumbering and
266 // full liveness analysis.
267 Timer T_renumber2;
268 Func->renumberInstructions();
269 if (Func->hasError())
270 return;
271 T_renumber2.printElapsedUs(Context, "renumberInstructions()");
272 Timer T_liveness2;
273 Func->liveness(Liveness_Intervals);
274 if (Func->hasError())
275 return;
276 T_liveness2.printElapsedUs(Context, "liveness()");
277 // Validate the live range computations. Do it outside the timing
278 // code. TODO: Put this under a flag.
279 bool ValidLiveness = Func->validateLiveness();
280 assert(ValidLiveness);
281 (void)ValidLiveness; // used only in assert()
282 ComputedLiveRanges = true;
283 // The post-codegen dump is done here, after liveness analysis and
284 // associated cleanup, to make the dump cleaner and more useful.
285 Func->dump("After initial x8632 codegen");
286 Timer T_regAlloc;
287 regAlloc();
288 if (Func->hasError())
289 return;
290 T_regAlloc.printElapsedUs(Context, "regAlloc()");
291 Func->dump("After linear scan regalloc");
292
293 // Stack frame mapping.
294 Timer T_genFrame;
295 Func->genFrame();
296 if (Func->hasError())
297 return;
298 T_genFrame.printElapsedUs(Context, "genFrame()");
299 Func->dump("After stack frame mapping");
300}
301
302void TargetX8632::translateOm1() {
303 GlobalContext *Context = Func->getContext();
304 Timer T_placePhiLoads;
305 Func->placePhiLoads();
306 if (Func->hasError())
307 return;
308 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
309 Timer T_placePhiStores;
310 Func->placePhiStores();
311 if (Func->hasError())
312 return;
313 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
314 Timer T_deletePhis;
315 Func->deletePhis();
316 if (Func->hasError())
317 return;
318 T_deletePhis.printElapsedUs(Context, "deletePhis()");
319 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700320
321 Timer T_genCode;
322 Func->genCode();
323 if (Func->hasError())
324 return;
325 T_genCode.printElapsedUs(Context, "genCode()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700326 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700327
328 Timer T_genFrame;
329 Func->genFrame();
330 if (Func->hasError())
331 return;
332 T_genFrame.printElapsedUs(Context, "genFrame()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700333 Func->dump("After stack frame mapping");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700334}
335
336IceString TargetX8632::RegNames[] = {
337#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
338 frameptr, isI8, isInt, isFP) \
339 name,
340 REGX8632_TABLE
341#undef X
342};
343
344Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
345 assert(RegNum < PhysicalRegisters.size());
346 Variable *Reg = PhysicalRegisters[RegNum];
347 if (Reg == NULL) {
348 CfgNode *Node = NULL; // NULL means multi-block lifetime
349 Reg = Func->makeVariable(IceType_i32, Node);
350 Reg->setRegNum(RegNum);
351 PhysicalRegisters[RegNum] = Reg;
352 }
353 return Reg;
354}
355
356IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
357 assert(RegNum < Reg_NUM);
358 static IceString RegNames8[] = {
359#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
360 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700361 name8,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700362 REGX8632_TABLE
363#undef X
364 };
365 static IceString RegNames16[] = {
366#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
367 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700368 name16,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700369 REGX8632_TABLE
370#undef X
371 };
372 switch (Ty) {
373 case IceType_i1:
374 case IceType_i8:
375 return RegNames8[RegNum];
376 case IceType_i16:
377 return RegNames16[RegNum];
378 default:
379 return RegNames[RegNum];
380 }
381}
382
383void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const {
384 Ostream &Str = Ctx->getStrEmit();
385 assert(Var->getLocalUseNode() == NULL ||
386 Var->getLocalUseNode() == Func->getCurrentNode());
387 if (Var->hasReg()) {
388 Str << getRegName(Var->getRegNum(), Var->getType());
389 return;
390 }
391 Str << InstX8632::getWidthString(Var->getType());
392 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700393 int32_t Offset = Var->getStackOffset();
394 if (!hasFramePointer())
395 Offset += getStackAdjustment();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700396 if (Offset) {
397 if (Offset > 0)
398 Str << "+";
399 Str << Offset;
400 }
401 Str << "]";
402}
403
404// Helper function for addProlog(). Sets the frame offset for Arg,
405// updates InArgsSizeBytes according to Arg's width, and generates an
406// instruction to copy Arg into its assigned register if applicable.
407// For an I64 arg that has been split into Lo and Hi components, it
408// calls itself recursively on the components, taking care to handle
409// Lo first because of the little-endian architecture.
410void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700411 size_t BasicFrameOffset,
412 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700413 Variable *Lo = Arg->getLo();
414 Variable *Hi = Arg->getHi();
415 Type Ty = Arg->getType();
416 if (Lo && Hi && Ty == IceType_i64) {
417 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
418 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
419 setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
420 setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
421 return;
422 }
423 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
424 if (Arg->hasReg()) {
425 assert(Ty != IceType_i64);
426 OperandX8632Mem *Mem = OperandX8632Mem::create(
427 Func, Ty, FramePtr,
428 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
429 _mov(Arg, Mem);
430 }
431 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
432}
433
434void TargetX8632::addProlog(CfgNode *Node) {
435 // If SimpleCoalescing is false, each variable without a register
436 // gets its own unique stack slot, which leads to large stack
437 // frames. If SimpleCoalescing is true, then each "global" variable
438 // without a register gets its own slot, but "local" variable slots
439 // are reused across basic blocks. E.g., if A and B are local to
440 // block 1 and C is local to block 2, then C may share a slot with A
441 // or B.
442 const bool SimpleCoalescing = true;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700443 size_t InArgsSizeBytes = 0;
444 size_t RetIpSizeBytes = 4;
445 size_t PreservedRegsSizeBytes = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700446 LocalsSizeBytes = 0;
447 Context.init(Node);
448 Context.setInsertPoint(Context.getCur());
449
450 // Determine stack frame offsets for each Variable without a
451 // register assignment. This can be done as one variable per stack
452 // slot. Or, do coalescing by running the register allocator again
453 // with an infinite set of registers (as a side effect, this gives
454 // variables a second chance at physical register assignment).
455 //
456 // A middle ground approach is to leverage sparsity and allocate one
457 // block of space on the frame for globals (variables with
458 // multi-block lifetime), and one block to share for locals
459 // (single-block lifetime).
460
461 llvm::SmallBitVector CalleeSaves =
462 getRegisterSet(RegSet_CalleeSave, RegSet_None);
463
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700464 size_t GlobalsSize = 0;
465 std::vector<size_t> LocalsSize(Func->getNumNodes());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700466
467 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
468 // LocalsSizeBytes.
469 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
470 const VarList &Variables = Func->getVariables();
471 const VarList &Args = Func->getArgs();
472 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
473 I != E; ++I) {
474 Variable *Var = *I;
475 if (Var->hasReg()) {
476 RegsUsed[Var->getRegNum()] = true;
477 continue;
478 }
479 // An argument passed on the stack already has a stack slot.
480 if (Var->getIsArg())
481 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700482 // An unreferenced variable doesn't need a stack slot.
483 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
484 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700485 // A spill slot linked to a variable with a stack slot should reuse
486 // that stack slot.
487 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
488 if (Variable *Linked = Var->getPreferredRegister()) {
489 if (!Linked->hasReg())
490 continue;
491 }
492 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700493 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700494 if (SimpleCoalescing) {
495 if (Var->isMultiblockLife()) {
496 GlobalsSize += Increment;
497 } else {
498 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
499 LocalsSize[NodeIndex] += Increment;
500 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
501 LocalsSizeBytes = LocalsSize[NodeIndex];
502 }
503 } else {
504 LocalsSizeBytes += Increment;
505 }
506 }
507 LocalsSizeBytes += GlobalsSize;
508
509 // Add push instructions for preserved registers.
510 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
511 if (CalleeSaves[i] && RegsUsed[i]) {
512 PreservedRegsSizeBytes += 4;
513 const bool SuppressStackAdjustment = true;
514 _push(getPhysicalRegister(i), SuppressStackAdjustment);
515 }
516 }
517
518 // Generate "push ebp; mov ebp, esp"
519 if (IsEbpBasedFrame) {
520 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
521 .count() == 0);
522 PreservedRegsSizeBytes += 4;
523 Variable *ebp = getPhysicalRegister(Reg_ebp);
524 Variable *esp = getPhysicalRegister(Reg_esp);
525 const bool SuppressStackAdjustment = true;
526 _push(ebp, SuppressStackAdjustment);
527 _mov(ebp, esp);
528 }
529
530 // Generate "sub esp, LocalsSizeBytes"
531 if (LocalsSizeBytes)
532 _sub(getPhysicalRegister(Reg_esp),
533 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
534
535 resetStackAdjustment();
536
537 // Fill in stack offsets for args, and copy args into registers for
538 // those that were register-allocated. Args are pushed right to
539 // left, so Arg[0] is closest to the stack/frame pointer.
540 //
541 // TODO: Make this right for different width args, calling
542 // conventions, etc. For one thing, args passed in registers will
543 // need to be copied/shuffled to their home registers (the
544 // RegManager code may have some permutation logic to leverage),
545 // and if they have no home register, home space will need to be
546 // allocated on the stack to copy into.
547 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700548 size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700549 if (!IsEbpBasedFrame)
550 BasicFrameOffset += LocalsSizeBytes;
551 for (SizeT i = 0; i < Args.size(); ++i) {
552 Variable *Arg = Args[i];
553 setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
554 }
555
556 // Fill in stack offsets for locals.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700557 size_t TotalGlobalsSize = GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700558 GlobalsSize = 0;
559 LocalsSize.assign(LocalsSize.size(), 0);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700560 size_t NextStackOffset = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700561 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
562 I != E; ++I) {
563 Variable *Var = *I;
564 if (Var->hasReg()) {
565 RegsUsed[Var->getRegNum()] = true;
566 continue;
567 }
568 if (Var->getIsArg())
569 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700570 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
571 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700572 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
573 if (Variable *Linked = Var->getPreferredRegister()) {
574 if (!Linked->hasReg()) {
575 // TODO: Make sure Linked has already been assigned a stack
576 // slot.
577 Var->setStackOffset(Linked->getStackOffset());
578 continue;
579 }
580 }
581 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700582 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700583 if (SimpleCoalescing) {
584 if (Var->isMultiblockLife()) {
585 GlobalsSize += Increment;
586 NextStackOffset = GlobalsSize;
587 } else {
588 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
589 LocalsSize[NodeIndex] += Increment;
590 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
591 }
592 } else {
593 NextStackOffset += Increment;
594 }
595 if (IsEbpBasedFrame)
596 Var->setStackOffset(-NextStackOffset);
597 else
598 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
599 }
600 this->FrameSizeLocals = NextStackOffset;
601 this->HasComputedFrame = true;
602
603 if (Func->getContext()->isVerbose(IceV_Frame)) {
604 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
605 << "\n"
606 << "InArgsSizeBytes=" << InArgsSizeBytes
607 << "\n"
608 << "PreservedRegsSizeBytes="
609 << PreservedRegsSizeBytes << "\n";
610 }
611}
612
613void TargetX8632::addEpilog(CfgNode *Node) {
614 InstList &Insts = Node->getInsts();
615 InstList::reverse_iterator RI, E;
616 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
617 if (llvm::isa<InstX8632Ret>(*RI))
618 break;
619 }
620 if (RI == E)
621 return;
622
623 // Convert the reverse_iterator position into its corresponding
624 // (forward) iterator position.
625 InstList::iterator InsertPoint = RI.base();
626 --InsertPoint;
627 Context.init(Node);
628 Context.setInsertPoint(InsertPoint);
629
630 Variable *esp = getPhysicalRegister(Reg_esp);
631 if (IsEbpBasedFrame) {
632 Variable *ebp = getPhysicalRegister(Reg_ebp);
633 _mov(esp, ebp);
634 _pop(ebp);
635 } else {
636 // add esp, LocalsSizeBytes
637 if (LocalsSizeBytes)
638 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
639 }
640
641 // Add pop instructions for preserved registers.
642 llvm::SmallBitVector CalleeSaves =
643 getRegisterSet(RegSet_CalleeSave, RegSet_None);
644 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
645 SizeT j = CalleeSaves.size() - i - 1;
646 if (j == Reg_ebp && IsEbpBasedFrame)
647 continue;
648 if (CalleeSaves[j] && RegsUsed[j]) {
649 _pop(getPhysicalRegister(j));
650 }
651 }
652}
653
Jim Stichnothf61d5b22014-05-23 13:31:24 -0700654template <typename T> struct PoolTypeConverter {};
655
656template <> struct PoolTypeConverter<float> {
657 typedef float PrimitiveFpType;
658 typedef uint32_t PrimitiveIntType;
659 typedef ConstantFloat IceType;
660 static const Type Ty = IceType_f32;
661 static const char *TypeName;
662 static const char *AsmTag;
663 static const char *PrintfString;
664};
665const char *PoolTypeConverter<float>::TypeName = "float";
666const char *PoolTypeConverter<float>::AsmTag = ".long";
667const char *PoolTypeConverter<float>::PrintfString = "0x%x";
668
669template <> struct PoolTypeConverter<double> {
670 typedef double PrimitiveFpType;
671 typedef uint64_t PrimitiveIntType;
672 typedef ConstantDouble IceType;
673 static const Type Ty = IceType_f64;
674 static const char *TypeName;
675 static const char *AsmTag;
676 static const char *PrintfString;
677};
678const char *PoolTypeConverter<double>::TypeName = "double";
679const char *PoolTypeConverter<double>::AsmTag = ".quad";
680const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
681
682template <typename T> void TargetX8632::emitConstantPool() const {
683 Ostream &Str = Ctx->getStrEmit();
684 Type Ty = T::Ty;
685 SizeT Align = typeAlignInBytes(Ty);
686 ConstantList Pool = Ctx->getConstantPool(Ty);
687
688 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
689 << "\n";
690 Str << "\t.align\t" << Align << "\n";
691 for (ConstantList::const_iterator I = Pool.begin(), E = Pool.end(); I != E;
692 ++I) {
693 typename T::IceType *Const = llvm::cast<typename T::IceType>(*I);
694 typename T::PrimitiveFpType Value = Const->getValue();
695 // Use memcpy() to copy bits from Value into RawValue in a way
696 // that avoids breaking strict-aliasing rules.
697 typename T::PrimitiveIntType RawValue;
698 memcpy(&RawValue, &Value, sizeof(Value));
699 char buf[30];
700 int CharsPrinted =
701 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
702 assert(CharsPrinted >= 0 &&
703 (size_t)CharsPrinted < llvm::array_lengthof(buf));
704 (void)CharsPrinted; // avoid warnings if asserts are disabled
705 Str << "L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
706 Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
707 << Value << "\n";
708 }
709}
710
711void TargetX8632::emitConstants() const {
712 emitConstantPool<PoolTypeConverter<float> >();
713 emitConstantPool<PoolTypeConverter<double> >();
714
715 // No need to emit constants from the int pool since (for x86) they
716 // are embedded as immediates in the instructions.
717}
718
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700719void TargetX8632::split64(Variable *Var) {
720 switch (Var->getType()) {
721 default:
722 return;
723 case IceType_i64:
724 // TODO: Only consider F64 if we need to push each half when
725 // passing as an argument to a function call. Note that each half
726 // is still typed as I32.
727 case IceType_f64:
728 break;
729 }
730 Variable *Lo = Var->getLo();
731 Variable *Hi = Var->getHi();
732 if (Lo) {
733 assert(Hi);
734 return;
735 }
736 assert(Hi == NULL);
737 Lo = Func->makeVariable(IceType_i32, Context.getNode(),
738 Var->getName() + "__lo");
739 Hi = Func->makeVariable(IceType_i32, Context.getNode(),
740 Var->getName() + "__hi");
741 Var->setLoHi(Lo, Hi);
742 if (Var->getIsArg()) {
743 Lo->setIsArg(Func);
744 Hi->setIsArg(Func);
745 }
746}
747
748Operand *TargetX8632::loOperand(Operand *Operand) {
749 assert(Operand->getType() == IceType_i64);
750 if (Operand->getType() != IceType_i64)
751 return Operand;
752 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
753 split64(Var);
754 return Var->getLo();
755 }
756 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
757 uint64_t Mask = (1ull << 32) - 1;
758 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);
759 }
760 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
761 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
762 Mem->getOffset(), Mem->getIndex(),
763 Mem->getShift());
764 }
765 llvm_unreachable("Unsupported operand type");
766 return NULL;
767}
768
769Operand *TargetX8632::hiOperand(Operand *Operand) {
770 assert(Operand->getType() == IceType_i64);
771 if (Operand->getType() != IceType_i64)
772 return Operand;
773 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
774 split64(Var);
775 return Var->getHi();
776 }
777 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
778 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);
779 }
780 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
781 Constant *Offset = Mem->getOffset();
782 if (Offset == NULL)
783 Offset = Ctx->getConstantInt(IceType_i32, 4);
784 else if (ConstantInteger *IntOffset =
785 llvm::dyn_cast<ConstantInteger>(Offset)) {
786 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());
787 } else if (ConstantRelocatable *SymOffset =
788 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
789 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
790 SymOffset->getName());
791 }
792 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
793 Mem->getIndex(), Mem->getShift());
794 }
795 llvm_unreachable("Unsupported operand type");
796 return NULL;
797}
798
799llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
800 RegSetMask Exclude) const {
801 llvm::SmallBitVector Registers(Reg_NUM);
802
803#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
804 frameptr, isI8, isInt, isFP) \
805 if (scratch && (Include & RegSet_CallerSave)) \
806 Registers[val] = true; \
807 if (preserved && (Include & RegSet_CalleeSave)) \
808 Registers[val] = true; \
809 if (stackptr && (Include & RegSet_StackPointer)) \
810 Registers[val] = true; \
811 if (frameptr && (Include & RegSet_FramePointer)) \
812 Registers[val] = true; \
813 if (scratch && (Exclude & RegSet_CallerSave)) \
814 Registers[val] = false; \
815 if (preserved && (Exclude & RegSet_CalleeSave)) \
816 Registers[val] = false; \
817 if (stackptr && (Exclude & RegSet_StackPointer)) \
818 Registers[val] = false; \
819 if (frameptr && (Exclude & RegSet_FramePointer)) \
820 Registers[val] = false;
821
822 REGX8632_TABLE
823
824#undef X
825
826 return Registers;
827}
828
829void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
830 IsEbpBasedFrame = true;
831 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
832 // the number of adjustments of esp, etc.
833 Variable *esp = getPhysicalRegister(Reg_esp);
834 Operand *TotalSize = legalize(Inst->getSizeInBytes());
835 Variable *Dest = Inst->getDest();
836 _sub(esp, TotalSize);
837 _mov(Dest, esp);
838}
839
840void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
841 Variable *Dest = Inst->getDest();
842 Operand *Src0 = legalize(Inst->getSrc(0));
843 Operand *Src1 = legalize(Inst->getSrc(1));
844 if (Dest->getType() == IceType_i64) {
845 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
846 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
847 Operand *Src0Lo = loOperand(Src0);
848 Operand *Src0Hi = hiOperand(Src0);
849 Operand *Src1Lo = loOperand(Src1);
850 Operand *Src1Hi = hiOperand(Src1);
851 Variable *T_Lo = NULL, *T_Hi = NULL;
852 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -0700853 case InstArithmetic::_num:
854 llvm_unreachable("Unknown arithmetic operator");
855 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700856 case InstArithmetic::Add:
857 _mov(T_Lo, Src0Lo);
858 _add(T_Lo, Src1Lo);
859 _mov(DestLo, T_Lo);
860 _mov(T_Hi, Src0Hi);
861 _adc(T_Hi, Src1Hi);
862 _mov(DestHi, T_Hi);
863 break;
864 case InstArithmetic::And:
865 _mov(T_Lo, Src0Lo);
866 _and(T_Lo, Src1Lo);
867 _mov(DestLo, T_Lo);
868 _mov(T_Hi, Src0Hi);
869 _and(T_Hi, Src1Hi);
870 _mov(DestHi, T_Hi);
871 break;
872 case InstArithmetic::Or:
873 _mov(T_Lo, Src0Lo);
874 _or(T_Lo, Src1Lo);
875 _mov(DestLo, T_Lo);
876 _mov(T_Hi, Src0Hi);
877 _or(T_Hi, Src1Hi);
878 _mov(DestHi, T_Hi);
879 break;
880 case InstArithmetic::Xor:
881 _mov(T_Lo, Src0Lo);
882 _xor(T_Lo, Src1Lo);
883 _mov(DestLo, T_Lo);
884 _mov(T_Hi, Src0Hi);
885 _xor(T_Hi, Src1Hi);
886 _mov(DestHi, T_Hi);
887 break;
888 case InstArithmetic::Sub:
889 _mov(T_Lo, Src0Lo);
890 _sub(T_Lo, Src1Lo);
891 _mov(DestLo, T_Lo);
892 _mov(T_Hi, Src0Hi);
893 _sbb(T_Hi, Src1Hi);
894 _mov(DestHi, T_Hi);
895 break;
896 case InstArithmetic::Mul: {
897 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
898 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
899 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);
900 // gcc does the following:
901 // a=b*c ==>
902 // t1 = b.hi; t1 *=(imul) c.lo
903 // t2 = c.hi; t2 *=(imul) b.lo
904 // t3:eax = b.lo
905 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
906 // a.lo = t4.lo
907 // t4.hi += t1
908 // t4.hi += t2
909 // a.hi = t4.hi
910 _mov(T_1, Src0Hi);
911 _imul(T_1, Src1Lo);
912 _mov(T_2, Src1Hi);
913 _imul(T_2, Src0Lo);
914 _mov(T_3, Src0Lo, Reg_eax);
915 _mul(T_4Lo, T_3, Src1Lo);
916 // The mul instruction produces two dest variables, edx:eax. We
917 // create a fake definition of edx to account for this.
918 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
919 _mov(DestLo, T_4Lo);
920 _add(T_4Hi, T_1);
921 _add(T_4Hi, T_2);
922 _mov(DestHi, T_4Hi);
923 } break;
924 case InstArithmetic::Shl: {
925 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
926 // gcc does the following:
927 // a=b<<c ==>
928 // t1:ecx = c.lo & 0xff
929 // t2 = b.lo
930 // t3 = b.hi
931 // t3 = shld t3, t2, t1
932 // t2 = shl t2, t1
933 // test t1, 0x20
934 // je L1
935 // use(t3)
936 // t3 = t2
937 // t2 = 0
938 // L1:
939 // a.lo = t2
940 // a.hi = t3
941 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
942 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
943 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
944 InstX8632Label *Label = InstX8632Label::create(Func, this);
945 _mov(T_1, Src1Lo, Reg_ecx);
946 _mov(T_2, Src0Lo);
947 _mov(T_3, Src0Hi);
948 _shld(T_3, T_2, T_1);
949 _shl(T_2, T_1);
950 _test(T_1, BitTest);
951 _br(InstX8632Br::Br_e, Label);
952 // Because of the intra-block control flow, we need to fake a use
953 // of T_3 to prevent its earlier definition from being dead-code
954 // eliminated in the presence of its later definition.
955 Context.insert(InstFakeUse::create(Func, T_3));
956 _mov(T_3, T_2);
957 _mov(T_2, Zero);
958 Context.insert(Label);
959 _mov(DestLo, T_2);
960 _mov(DestHi, T_3);
961 } break;
962 case InstArithmetic::Lshr: {
963 // a=b>>c (unsigned) ==>
964 // t1:ecx = c.lo & 0xff
965 // t2 = b.lo
966 // t3 = b.hi
967 // t2 = shrd t2, t3, t1
968 // t3 = shr t3, t1
969 // test t1, 0x20
970 // je L1
971 // use(t2)
972 // t2 = t3
973 // t3 = 0
974 // L1:
975 // a.lo = t2
976 // a.hi = t3
977 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
978 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
979 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
980 InstX8632Label *Label = InstX8632Label::create(Func, this);
981 _mov(T_1, Src1Lo, Reg_ecx);
982 _mov(T_2, Src0Lo);
983 _mov(T_3, Src0Hi);
984 _shrd(T_2, T_3, T_1);
985 _shr(T_3, T_1);
986 _test(T_1, BitTest);
987 _br(InstX8632Br::Br_e, Label);
988 // Because of the intra-block control flow, we need to fake a use
989 // of T_3 to prevent its earlier definition from being dead-code
990 // eliminated in the presence of its later definition.
991 Context.insert(InstFakeUse::create(Func, T_2));
992 _mov(T_2, T_3);
993 _mov(T_3, Zero);
994 Context.insert(Label);
995 _mov(DestLo, T_2);
996 _mov(DestHi, T_3);
997 } break;
998 case InstArithmetic::Ashr: {
999 // a=b>>c (signed) ==>
1000 // t1:ecx = c.lo & 0xff
1001 // t2 = b.lo
1002 // t3 = b.hi
1003 // t2 = shrd t2, t3, t1
1004 // t3 = sar t3, t1
1005 // test t1, 0x20
1006 // je L1
1007 // use(t2)
1008 // t2 = t3
1009 // t3 = sar t3, 0x1f
1010 // L1:
1011 // a.lo = t2
1012 // a.hi = t3
1013 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1014 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
1015 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);
1016 InstX8632Label *Label = InstX8632Label::create(Func, this);
1017 _mov(T_1, Src1Lo, Reg_ecx);
1018 _mov(T_2, Src0Lo);
1019 _mov(T_3, Src0Hi);
1020 _shrd(T_2, T_3, T_1);
1021 _sar(T_3, T_1);
1022 _test(T_1, BitTest);
1023 _br(InstX8632Br::Br_e, Label);
1024 // Because of the intra-block control flow, we need to fake a use
1025 // of T_3 to prevent its earlier definition from being dead-code
1026 // eliminated in the presence of its later definition.
1027 Context.insert(InstFakeUse::create(Func, T_2));
1028 _mov(T_2, T_3);
1029 _sar(T_3, SignExtend);
1030 Context.insert(Label);
1031 _mov(DestLo, T_2);
1032 _mov(DestHi, T_3);
1033 } break;
1034 case InstArithmetic::Udiv: {
1035 const SizeT MaxSrcs = 2;
1036 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
1037 Call->addArg(Inst->getSrc(0));
1038 Call->addArg(Inst->getSrc(1));
1039 lowerCall(Call);
1040 } break;
1041 case InstArithmetic::Sdiv: {
1042 const SizeT MaxSrcs = 2;
1043 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
1044 Call->addArg(Inst->getSrc(0));
1045 Call->addArg(Inst->getSrc(1));
1046 lowerCall(Call);
1047 } break;
1048 case InstArithmetic::Urem: {
1049 const SizeT MaxSrcs = 2;
1050 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
1051 Call->addArg(Inst->getSrc(0));
1052 Call->addArg(Inst->getSrc(1));
1053 lowerCall(Call);
1054 } break;
1055 case InstArithmetic::Srem: {
1056 const SizeT MaxSrcs = 2;
1057 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
1058 Call->addArg(Inst->getSrc(0));
1059 Call->addArg(Inst->getSrc(1));
1060 lowerCall(Call);
1061 } break;
1062 case InstArithmetic::Fadd:
1063 case InstArithmetic::Fsub:
1064 case InstArithmetic::Fmul:
1065 case InstArithmetic::Fdiv:
1066 case InstArithmetic::Frem:
1067 llvm_unreachable("FP instruction with i64 type");
1068 break;
1069 }
1070 } else { // Dest->getType() != IceType_i64
1071 Variable *T_edx = NULL;
1072 Variable *T = NULL;
1073 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001074 case InstArithmetic::_num:
1075 llvm_unreachable("Unknown arithmetic operator");
1076 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001077 case InstArithmetic::Add:
1078 _mov(T, Src0);
1079 _add(T, Src1);
1080 _mov(Dest, T);
1081 break;
1082 case InstArithmetic::And:
1083 _mov(T, Src0);
1084 _and(T, Src1);
1085 _mov(Dest, T);
1086 break;
1087 case InstArithmetic::Or:
1088 _mov(T, Src0);
1089 _or(T, Src1);
1090 _mov(Dest, T);
1091 break;
1092 case InstArithmetic::Xor:
1093 _mov(T, Src0);
1094 _xor(T, Src1);
1095 _mov(Dest, T);
1096 break;
1097 case InstArithmetic::Sub:
1098 _mov(T, Src0);
1099 _sub(T, Src1);
1100 _mov(Dest, T);
1101 break;
1102 case InstArithmetic::Mul:
1103 // TODO: Optimize for llvm::isa<Constant>(Src1)
1104 // TODO: Strength-reduce multiplications by a constant,
1105 // particularly -1 and powers of 2. Advanced: use lea to
1106 // multiply by 3, 5, 9.
1107 //
1108 // The 8-bit version of imul only allows the form "imul r/m8"
1109 // where T must be in eax.
1110 if (Dest->getType() == IceType_i8)
1111 _mov(T, Src0, Reg_eax);
1112 else
1113 _mov(T, Src0);
1114 _imul(T, Src1);
1115 _mov(Dest, T);
1116 break;
1117 case InstArithmetic::Shl:
1118 _mov(T, Src0);
1119 if (!llvm::isa<Constant>(Src1))
1120 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1121 _shl(T, Src1);
1122 _mov(Dest, T);
1123 break;
1124 case InstArithmetic::Lshr:
1125 _mov(T, Src0);
1126 if (!llvm::isa<Constant>(Src1))
1127 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1128 _shr(T, Src1);
1129 _mov(Dest, T);
1130 break;
1131 case InstArithmetic::Ashr:
1132 _mov(T, Src0);
1133 if (!llvm::isa<Constant>(Src1))
1134 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1135 _sar(T, Src1);
1136 _mov(Dest, T);
1137 break;
1138 case InstArithmetic::Udiv:
Jan Voung70d68832014-06-17 10:02:37 -07001139 // div and idiv are the few arithmetic operators that do not allow
1140 // immediates as the operand.
1141 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001142 if (Dest->getType() == IceType_i8) {
1143 Variable *T_ah = NULL;
1144 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
1145 _mov(T, Src0, Reg_eax);
1146 _mov(T_ah, Zero, Reg_ah);
1147 _div(T, Src1, T_ah);
1148 _mov(Dest, T);
1149 } else {
1150 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1151 _mov(T, Src0, Reg_eax);
1152 _mov(T_edx, Zero, Reg_edx);
1153 _div(T, Src1, T_edx);
1154 _mov(Dest, T);
1155 }
1156 break;
1157 case InstArithmetic::Sdiv:
Jan Voung70d68832014-06-17 10:02:37 -07001158 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001159 T_edx = makeReg(IceType_i32, Reg_edx);
1160 _mov(T, Src0, Reg_eax);
1161 _cdq(T_edx, T);
1162 _idiv(T, Src1, T_edx);
1163 _mov(Dest, T);
1164 break;
1165 case InstArithmetic::Urem:
Jan Voung70d68832014-06-17 10:02:37 -07001166 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001167 if (Dest->getType() == IceType_i8) {
1168 Variable *T_ah = NULL;
1169 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
1170 _mov(T, Src0, Reg_eax);
1171 _mov(T_ah, Zero, Reg_ah);
1172 _div(T_ah, Src1, T);
1173 _mov(Dest, T_ah);
1174 } else {
1175 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1176 _mov(T_edx, Zero, Reg_edx);
1177 _mov(T, Src0, Reg_eax);
1178 _div(T_edx, Src1, T);
1179 _mov(Dest, T_edx);
1180 }
1181 break;
1182 case InstArithmetic::Srem:
Jan Voung70d68832014-06-17 10:02:37 -07001183 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001184 T_edx = makeReg(IceType_i32, Reg_edx);
1185 _mov(T, Src0, Reg_eax);
1186 _cdq(T_edx, T);
1187 _idiv(T_edx, Src1, T);
1188 _mov(Dest, T_edx);
1189 break;
1190 case InstArithmetic::Fadd:
1191 _mov(T, Src0);
1192 _addss(T, Src1);
1193 _mov(Dest, T);
1194 break;
1195 case InstArithmetic::Fsub:
1196 _mov(T, Src0);
1197 _subss(T, Src1);
1198 _mov(Dest, T);
1199 break;
1200 case InstArithmetic::Fmul:
1201 _mov(T, Src0);
1202 _mulss(T, Src1);
1203 _mov(Dest, T);
1204 break;
1205 case InstArithmetic::Fdiv:
1206 _mov(T, Src0);
1207 _divss(T, Src1);
1208 _mov(Dest, T);
1209 break;
1210 case InstArithmetic::Frem: {
1211 const SizeT MaxSrcs = 2;
1212 Type Ty = Dest->getType();
1213 InstCall *Call =
1214 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);
1215 Call->addArg(Src0);
1216 Call->addArg(Src1);
1217 return lowerCall(Call);
1218 } break;
1219 }
1220 }
1221}
1222
1223void TargetX8632::lowerAssign(const InstAssign *Inst) {
1224 Variable *Dest = Inst->getDest();
1225 Operand *Src0 = Inst->getSrc(0);
1226 assert(Dest->getType() == Src0->getType());
1227 if (Dest->getType() == IceType_i64) {
1228 Src0 = legalize(Src0);
1229 Operand *Src0Lo = loOperand(Src0);
1230 Operand *Src0Hi = hiOperand(Src0);
1231 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1232 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1233 Variable *T_Lo = NULL, *T_Hi = NULL;
1234 _mov(T_Lo, Src0Lo);
1235 _mov(DestLo, T_Lo);
1236 _mov(T_Hi, Src0Hi);
1237 _mov(DestHi, T_Hi);
1238 } else {
1239 const bool AllowOverlap = true;
1240 // RI is either a physical register or an immediate.
1241 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap);
1242 _mov(Dest, RI);
1243 }
1244}
1245
1246void TargetX8632::lowerBr(const InstBr *Inst) {
1247 if (Inst->isUnconditional()) {
1248 _br(Inst->getTargetUnconditional());
1249 } else {
1250 Operand *Src0 = legalize(Inst->getCondition());
1251 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1252 _cmp(Src0, Zero);
1253 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1254 }
1255}
1256
1257void TargetX8632::lowerCall(const InstCall *Instr) {
1258 // Generate a sequence of push instructions, pushing right to left,
1259 // keeping track of stack offsets in case a push involves a stack
1260 // operand and we are using an esp-based frame.
1261 uint32_t StackOffset = 0;
1262 // TODO: If for some reason the call instruction gets dead-code
1263 // eliminated after lowering, we would need to ensure that the
1264 // pre-call push instructions and the post-call esp adjustment get
1265 // eliminated as well.
1266 for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {
1267 Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));
1268 if (Arg->getType() == IceType_i64) {
1269 _push(hiOperand(Arg));
1270 _push(loOperand(Arg));
1271 } else if (Arg->getType() == IceType_f64) {
1272 // If the Arg turns out to be a memory operand, we need to push
1273 // 8 bytes, which requires two push instructions. This ends up
1274 // being somewhat clumsy in the current IR, so we use a
1275 // workaround. Force the operand into a (xmm) register, and
1276 // then push the register. An xmm register push is actually not
1277 // possible in x86, but the Push instruction emitter handles
1278 // this by decrementing the stack pointer and directly writing
1279 // the xmm register value.
1280 Variable *T = NULL;
1281 _mov(T, Arg);
1282 _push(T);
1283 } else {
Jan Voungbdbe4022014-06-17 17:12:26 -07001284 // Otherwise PNaCl requires parameter types to be at least 32-bits.
1285 assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001286 _push(Arg);
1287 }
1288 StackOffset += typeWidthInBytesOnStack(Arg->getType());
1289 }
1290 // Generate the call instruction. Assign its result to a temporary
1291 // with high register allocation weight.
1292 Variable *Dest = Instr->getDest();
1293 Variable *eax = NULL; // doubles as RegLo as necessary
1294 Variable *edx = NULL;
1295 if (Dest) {
1296 switch (Dest->getType()) {
1297 case IceType_NUM:
1298 llvm_unreachable("Invalid Call dest type");
1299 break;
1300 case IceType_void:
1301 break;
1302 case IceType_i1:
1303 case IceType_i8:
1304 case IceType_i16:
1305 case IceType_i32:
1306 eax = makeReg(Dest->getType(), Reg_eax);
1307 break;
1308 case IceType_i64:
1309 eax = makeReg(IceType_i32, Reg_eax);
1310 edx = makeReg(IceType_i32, Reg_edx);
1311 break;
1312 case IceType_f32:
1313 case IceType_f64:
1314 // Leave eax==edx==NULL, and capture the result with the fstp
1315 // instruction.
1316 break;
1317 }
1318 }
1319 Operand *CallTarget = legalize(Instr->getCallTarget());
1320 Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);
1321 Context.insert(NewCall);
1322 if (edx)
1323 Context.insert(InstFakeDef::create(Func, edx));
1324
1325 // Add the appropriate offset to esp.
1326 if (StackOffset) {
1327 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1328 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
1329 }
1330
1331 // Insert a register-kill pseudo instruction.
1332 VarList KilledRegs;
1333 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1334 if (ScratchRegs[i])
1335 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1336 }
1337 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
1338
1339 // Generate a FakeUse to keep the call live if necessary.
1340 if (Instr->hasSideEffects() && eax) {
1341 Inst *FakeUse = InstFakeUse::create(Func, eax);
1342 Context.insert(FakeUse);
1343 }
1344
1345 // Generate Dest=eax assignment.
1346 if (Dest && eax) {
1347 if (edx) {
1348 split64(Dest);
1349 Variable *DestLo = Dest->getLo();
1350 Variable *DestHi = Dest->getHi();
1351 DestLo->setPreferredRegister(eax, false);
1352 DestHi->setPreferredRegister(edx, false);
1353 _mov(DestLo, eax);
1354 _mov(DestHi, edx);
1355 } else {
1356 Dest->setPreferredRegister(eax, false);
1357 _mov(Dest, eax);
1358 }
1359 }
1360
1361 // Special treatment for an FP function which returns its result in
1362 // st(0).
1363 if (Dest &&
1364 (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) {
1365 _fstp(Dest);
1366 // If Dest ends up being a physical xmm register, the fstp emit
1367 // code will route st(0) through a temporary stack slot.
1368 }
1369}
1370
1371void TargetX8632::lowerCast(const InstCast *Inst) {
1372 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1373 InstCast::OpKind CastKind = Inst->getCastKind();
1374 Variable *Dest = Inst->getDest();
1375 // Src0RM is the source operand legalized to physical register or memory, but
1376 // not immediate, since the relevant x86 native instructions don't allow an
1377 // immediate operand. If the operand is an immediate, we could consider
1378 // computing the strength-reduced result at translation time, but we're
1379 // unlikely to see something like that in the bitcode that the optimizer
1380 // wouldn't have already taken care of.
Jan Voung70d68832014-06-17 10:02:37 -07001381 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001382 switch (CastKind) {
1383 default:
1384 Func->setError("Cast type not supported");
1385 return;
1386 case InstCast::Sext:
1387 if (Dest->getType() == IceType_i64) {
1388 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1389 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1390 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1391 Variable *T_Lo = makeReg(DestLo->getType());
1392 if (Src0RM->getType() == IceType_i32)
1393 _mov(T_Lo, Src0RM);
1394 else
1395 _movsx(T_Lo, Src0RM);
1396 _mov(DestLo, T_Lo);
1397 Variable *T_Hi = NULL;
1398 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);
1399 _mov(T_Hi, T_Lo);
1400 _sar(T_Hi, Shift);
1401 _mov(DestHi, T_Hi);
1402 } else {
1403 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
1404 // also copy to the high operand of a 64-bit variable.
1405 // t1 = movsx src; dst = t1
1406 Variable *T = makeReg(Dest->getType());
1407 _movsx(T, Src0RM);
1408 _mov(Dest, T);
1409 }
1410 break;
1411 case InstCast::Zext:
1412 if (Dest->getType() == IceType_i64) {
1413 // t1=movzx src; dst.lo=t1; dst.hi=0
1414 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1415 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1416 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1417 Variable *Tmp = makeReg(DestLo->getType());
1418 if (Src0RM->getType() == IceType_i32)
1419 _mov(Tmp, Src0RM);
1420 else
1421 _movzx(Tmp, Src0RM);
1422 _mov(DestLo, Tmp);
1423 _mov(DestHi, Zero);
1424 } else if (Src0RM->getType() == IceType_i1) {
1425 // t = Src0RM; t &= 1; Dest = t
1426 Operand *One = Ctx->getConstantInt(IceType_i32, 1);
1427 Variable *T = makeReg(IceType_i32);
1428 _movzx(T, Src0RM);
1429 _and(T, One);
1430 _mov(Dest, T);
1431 } else {
1432 // t1 = movzx src; dst = t1
1433 Variable *T = makeReg(Dest->getType());
1434 _movzx(T, Src0RM);
1435 _mov(Dest, T);
1436 }
1437 break;
1438 case InstCast::Trunc: {
1439 if (Src0RM->getType() == IceType_i64)
1440 Src0RM = loOperand(Src0RM);
1441 // t1 = trunc Src0RM; Dest = t1
1442 Variable *T = NULL;
1443 _mov(T, Src0RM);
1444 _mov(Dest, T);
1445 break;
1446 }
1447 case InstCast::Fptrunc:
1448 case InstCast::Fpext: {
1449 // t1 = cvt Src0RM; Dest = t1
1450 Variable *T = makeReg(Dest->getType());
1451 _cvt(T, Src0RM);
1452 _mov(Dest, T);
1453 break;
1454 }
1455 case InstCast::Fptosi:
1456 if (Dest->getType() == IceType_i64) {
1457 // Use a helper for converting floating-point values to 64-bit
1458 // integers. SSE2 appears to have no way to convert from xmm
1459 // registers to something like the edx:eax register pair, and
1460 // gcc and clang both want to use x87 instructions complete with
1461 // temporary manipulation of the status word. This helper is
1462 // not needed for x86-64.
1463 split64(Dest);
1464 const SizeT MaxSrcs = 1;
1465 Type SrcType = Inst->getSrc(0)->getType();
1466 InstCall *Call = makeHelperCall(
1467 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);
1468 // TODO: Call the correct compiler-rt helper function.
1469 Call->addArg(Inst->getSrc(0));
1470 lowerCall(Call);
1471 } else {
1472 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1473 Variable *T_1 = makeReg(IceType_i32);
1474 Variable *T_2 = makeReg(Dest->getType());
1475 _cvt(T_1, Src0RM);
1476 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1477 _mov(Dest, T_2);
1478 T_2->setPreferredRegister(T_1, true);
1479 }
1480 break;
1481 case InstCast::Fptoui:
1482 if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) {
1483 // Use a helper for both x86-32 and x86-64.
1484 split64(Dest);
1485 const SizeT MaxSrcs = 1;
1486 Type DestType = Dest->getType();
1487 Type SrcType = Src0RM->getType();
1488 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");
1489 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");
1490 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
1491 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
1492 // TODO: Call the correct compiler-rt helper function.
1493 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1494 Call->addArg(Inst->getSrc(0));
1495 lowerCall(Call);
1496 return;
1497 } else {
1498 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1499 Variable *T_1 = makeReg(IceType_i32);
1500 Variable *T_2 = makeReg(Dest->getType());
1501 _cvt(T_1, Src0RM);
1502 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1503 _mov(Dest, T_2);
1504 T_2->setPreferredRegister(T_1, true);
1505 }
1506 break;
1507 case InstCast::Sitofp:
1508 if (Src0RM->getType() == IceType_i64) {
1509 // Use a helper for x86-32.
1510 const SizeT MaxSrcs = 1;
1511 Type DestType = Dest->getType();
1512 InstCall *Call = makeHelperCall(
1513 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);
1514 // TODO: Call the correct compiler-rt helper function.
1515 Call->addArg(Inst->getSrc(0));
1516 lowerCall(Call);
1517 return;
1518 } else {
1519 // Sign-extend the operand.
1520 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
1521 Variable *T_1 = makeReg(IceType_i32);
1522 Variable *T_2 = makeReg(Dest->getType());
1523 if (Src0RM->getType() == IceType_i32)
1524 _mov(T_1, Src0RM);
1525 else
1526 _movsx(T_1, Src0RM);
1527 _cvt(T_2, T_1);
1528 _mov(Dest, T_2);
1529 }
1530 break;
1531 case InstCast::Uitofp:
1532 if (Src0RM->getType() == IceType_i64 || Src0RM->getType() == IceType_i32) {
1533 // Use a helper for x86-32 and x86-64. Also use a helper for
1534 // i32 on x86-32.
1535 const SizeT MaxSrcs = 1;
1536 Type DestType = Dest->getType();
1537 IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");
1538 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");
1539 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
1540 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
1541 // TODO: Call the correct compiler-rt helper function.
1542 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1543 Call->addArg(Inst->getSrc(0));
1544 lowerCall(Call);
1545 return;
1546 } else {
1547 // Zero-extend the operand.
1548 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
1549 Variable *T_1 = makeReg(IceType_i32);
1550 Variable *T_2 = makeReg(Dest->getType());
1551 if (Src0RM->getType() == IceType_i32)
1552 _mov(T_1, Src0RM);
1553 else
1554 _movzx(T_1, Src0RM);
1555 _cvt(T_2, T_1);
1556 _mov(Dest, T_2);
1557 }
1558 break;
1559 case InstCast::Bitcast:
1560 if (Dest->getType() == Src0RM->getType()) {
1561 InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);
1562 lowerAssign(Assign);
1563 return;
1564 }
1565 switch (Dest->getType()) {
1566 default:
1567 llvm_unreachable("Unexpected Bitcast dest type");
1568 case IceType_i32:
1569 case IceType_f32: {
1570 Type DestType = Dest->getType();
1571 Type SrcType = Src0RM->getType();
1572 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
1573 (DestType == IceType_f32 && SrcType == IceType_i32));
1574 // a.i32 = bitcast b.f32 ==>
1575 // t.f32 = b.f32
1576 // s.f32 = spill t.f32
1577 // a.i32 = s.f32
1578 Variable *T = NULL;
1579 // TODO: Should be able to force a spill setup by calling legalize() with
1580 // Legal_Mem and not Legal_Reg or Legal_Imm.
1581 Variable *Spill = Func->makeVariable(SrcType, Context.getNode());
1582 Spill->setWeight(RegWeight::Zero);
1583 Spill->setPreferredRegister(Dest, true);
1584 _mov(T, Src0RM);
1585 _mov(Spill, T);
1586 _mov(Dest, Spill);
1587 } break;
1588 case IceType_i64: {
1589 assert(Src0RM->getType() == IceType_f64);
1590 // a.i64 = bitcast b.f64 ==>
1591 // s.f64 = spill b.f64
1592 // t_lo.i32 = lo(s.f64)
1593 // a_lo.i32 = t_lo.i32
1594 // t_hi.i32 = hi(s.f64)
1595 // a_hi.i32 = t_hi.i32
1596 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1597 Spill->setWeight(RegWeight::Zero);
1598 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
1599 _mov(Spill, Src0RM);
1600
1601 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1602 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1603 Variable *T_Lo = makeReg(IceType_i32);
1604 Variable *T_Hi = makeReg(IceType_i32);
1605 VariableSplit *SpillLo =
1606 VariableSplit::create(Func, Spill, VariableSplit::Low);
1607 VariableSplit *SpillHi =
1608 VariableSplit::create(Func, Spill, VariableSplit::High);
1609
1610 _mov(T_Lo, SpillLo);
1611 _mov(DestLo, T_Lo);
1612 _mov(T_Hi, SpillHi);
1613 _mov(DestHi, T_Hi);
1614 } break;
1615 case IceType_f64: {
1616 assert(Src0RM->getType() == IceType_i64);
1617 // a.f64 = bitcast b.i64 ==>
1618 // t_lo.i32 = b_lo.i32
1619 // lo(s.f64) = t_lo.i32
1620 // FakeUse(s.f64)
1621 // t_hi.i32 = b_hi.i32
1622 // hi(s.f64) = t_hi.i32
1623 // a.f64 = s.f64
1624 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1625 Spill->setWeight(RegWeight::Zero);
1626 Spill->setPreferredRegister(Dest, true);
1627
1628 Context.insert(InstFakeDef::create(Func, Spill));
1629
1630 Variable *T_Lo = NULL, *T_Hi = NULL;
1631 VariableSplit *SpillLo =
1632 VariableSplit::create(Func, Spill, VariableSplit::Low);
1633 VariableSplit *SpillHi =
1634 VariableSplit::create(Func, Spill, VariableSplit::High);
1635 _mov(T_Lo, loOperand(Src0RM));
1636 _store(T_Lo, SpillLo);
1637 _mov(T_Hi, hiOperand(Src0RM));
1638 _store(T_Hi, SpillHi);
1639 _mov(Dest, Spill);
1640 } break;
1641 }
1642 break;
1643 }
1644}
1645
1646void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
1647 Operand *Src0 = Inst->getSrc(0);
1648 Operand *Src1 = Inst->getSrc(1);
1649 Variable *Dest = Inst->getDest();
1650 // Lowering a = fcmp cond, b, c
1651 // ucomiss b, c /* only if C1 != Br_None */
1652 // /* but swap b,c order if SwapOperands==true */
1653 // mov a, <default>
1654 // j<C1> label /* only if C1 != Br_None */
1655 // j<C2> label /* only if C2 != Br_None */
1656 // FakeUse(a) /* only if C1 != Br_None */
1657 // mov a, !<default> /* only if C1 != Br_None */
1658 // label: /* only if C1 != Br_None */
1659 InstFcmp::FCond Condition = Inst->getCondition();
1660 size_t Index = static_cast<size_t>(Condition);
1661 assert(Index < TableFcmpSize);
1662 if (TableFcmp[Index].SwapOperands) {
1663 Operand *Tmp = Src0;
1664 Src0 = Src1;
1665 Src1 = Tmp;
1666 }
1667 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
1668 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
1669 if (HasC1) {
1670 Src0 = legalize(Src0);
1671 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
1672 Variable *T = NULL;
1673 _mov(T, Src0);
1674 _ucomiss(T, Src1RM);
1675 }
1676 Constant *Default =
1677 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
1678 _mov(Dest, Default);
1679 if (HasC1) {
1680 InstX8632Label *Label = InstX8632Label::create(Func, this);
1681 _br(TableFcmp[Index].C1, Label);
1682 if (HasC2) {
1683 _br(TableFcmp[Index].C2, Label);
1684 }
1685 Context.insert(InstFakeUse::create(Func, Dest));
1686 Constant *NonDefault =
1687 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
1688 _mov(Dest, NonDefault);
1689 Context.insert(Label);
1690 }
1691}
1692
1693void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
1694 Operand *Src0 = legalize(Inst->getSrc(0));
1695 Operand *Src1 = legalize(Inst->getSrc(1));
1696 Variable *Dest = Inst->getDest();
1697
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001698 // If Src1 is an immediate, or known to be a physical register, we can
1699 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
1700 // a physical register. (Actually, either Src0 or Src1 can be chosen for
1701 // the physical register, but unfortunately we have to commit to one or
1702 // the other before register allocation.)
1703 bool IsSrc1ImmOrReg = false;
1704 if (llvm::isa<Constant>(Src1)) {
1705 IsSrc1ImmOrReg = true;
1706 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
1707 if (Var->hasReg())
1708 IsSrc1ImmOrReg = true;
1709 }
1710
1711 // Try to fuse a compare immediately followed by a conditional branch. This
1712 // is possible when the compare dest and the branch source operands are the
1713 // same, and are their only uses. TODO: implement this optimization for i64.
1714 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
1715 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
1716 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
1717 Operand *Src0New =
1718 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
1719 _cmp(Src0New, Src1);
1720 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
1721 NextBr->getTargetFalse());
1722 // Skip over the following branch instruction.
1723 NextBr->setDeleted();
1724 Context.advanceNext();
1725 return;
1726 }
1727 }
1728
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001729 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
1730 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1731 Constant *One = Ctx->getConstantInt(IceType_i32, 1);
1732 if (Src0->getType() == IceType_i64) {
1733 InstIcmp::ICond Condition = Inst->getCondition();
1734 size_t Index = static_cast<size_t>(Condition);
1735 assert(Index < TableIcmp64Size);
1736 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
1737 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
1738 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
1739 InstX8632Label *Label = InstX8632Label::create(Func, this);
1740 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
1741 _cmp(loOperand(Src0), Src1LoRI);
1742 _br(InstX8632Br::Br_ne, Label);
1743 _cmp(hiOperand(Src0), Src1HiRI);
1744 _br(InstX8632Br::Br_ne, Label);
1745 Context.insert(InstFakeUse::create(Func, Dest));
1746 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
1747 Context.insert(Label);
1748 } else {
1749 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
1750 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
1751 _mov(Dest, One);
1752 _cmp(hiOperand(Src0), Src1HiRI);
1753 _br(TableIcmp64[Index].C1, LabelTrue);
1754 _br(TableIcmp64[Index].C2, LabelFalse);
1755 _cmp(loOperand(Src0), Src1LoRI);
1756 _br(TableIcmp64[Index].C3, LabelTrue);
1757 Context.insert(LabelFalse);
1758 Context.insert(InstFakeUse::create(Func, Dest));
1759 _mov(Dest, Zero);
1760 Context.insert(LabelTrue);
1761 }
1762 return;
1763 }
1764
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001765 // cmp b, c
1766 Operand *Src0New =
1767 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
1768 InstX8632Label *Label = InstX8632Label::create(Func, this);
1769 _cmp(Src0New, Src1);
1770 _mov(Dest, One);
1771 _br(getIcmp32Mapping(Inst->getCondition()), Label);
1772 Context.insert(InstFakeUse::create(Func, Dest));
1773 _mov(Dest, Zero);
1774 Context.insert(Label);
1775}
1776
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001777namespace {
1778
1779bool isAdd(const Inst *Inst) {
1780 if (const InstArithmetic *Arith =
1781 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
1782 return (Arith->getOp() == InstArithmetic::Add);
1783 }
1784 return false;
1785}
1786
1787void computeAddressOpt(Variable *&Base, Variable *&Index, int32_t &Shift,
1788 int32_t &Offset) {
1789 (void)Offset; // TODO: pattern-match for non-zero offsets.
1790 if (Base == NULL)
1791 return;
1792 // If the Base has more than one use or is live across multiple
1793 // blocks, then don't go further. Alternatively (?), never consider
1794 // a transformation that would change a variable that is currently
1795 // *not* live across basic block boundaries into one that *is*.
1796 if (Base->isMultiblockLife() /* || Base->getUseCount() > 1*/)
1797 return;
1798
1799 while (true) {
1800 // Base is Base=Var ==>
1801 // set Base=Var
1802 const Inst *BaseInst = Base->getDefinition();
1803 Operand *BaseOperand0 = BaseInst ? BaseInst->getSrc(0) : NULL;
1804 Variable *BaseVariable0 = llvm::dyn_cast_or_null<Variable>(BaseOperand0);
1805 // TODO: Helper function for all instances of assignment
1806 // transitivity.
1807 if (BaseInst && llvm::isa<InstAssign>(BaseInst) && BaseVariable0 &&
1808 // TODO: ensure BaseVariable0 stays single-BB
1809 true) {
1810 Base = BaseVariable0;
1811 continue;
1812 }
1813
1814 // Index is Index=Var ==>
1815 // set Index=Var
1816
1817 // Index==NULL && Base is Base=Var1+Var2 ==>
1818 // set Base=Var1, Index=Var2, Shift=0
1819 Operand *BaseOperand1 =
1820 BaseInst && BaseInst->getSrcSize() >= 2 ? BaseInst->getSrc(1) : NULL;
1821 Variable *BaseVariable1 = llvm::dyn_cast_or_null<Variable>(BaseOperand1);
1822 if (Index == NULL && isAdd(BaseInst) && BaseVariable0 && BaseVariable1 &&
1823 // TODO: ensure BaseVariable0 and BaseVariable1 stay single-BB
1824 true) {
1825 Base = BaseVariable0;
1826 Index = BaseVariable1;
1827 Shift = 0; // should already have been 0
1828 continue;
1829 }
1830
1831 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
1832 // Index=Var, Shift+=log2(Const)
1833 const Inst *IndexInst = Index ? Index->getDefinition() : NULL;
1834 if (const InstArithmetic *ArithInst =
1835 llvm::dyn_cast_or_null<InstArithmetic>(IndexInst)) {
1836 Operand *IndexOperand0 = ArithInst->getSrc(0);
1837 Variable *IndexVariable0 = llvm::dyn_cast<Variable>(IndexOperand0);
1838 Operand *IndexOperand1 = ArithInst->getSrc(1);
1839 ConstantInteger *IndexConstant1 =
1840 llvm::dyn_cast<ConstantInteger>(IndexOperand1);
1841 if (ArithInst->getOp() == InstArithmetic::Mul && IndexVariable0 &&
1842 IndexOperand1->getType() == IceType_i32 && IndexConstant1) {
1843 uint64_t Mult = IndexConstant1->getValue();
1844 uint32_t LogMult;
1845 switch (Mult) {
1846 case 1:
1847 LogMult = 0;
1848 break;
1849 case 2:
1850 LogMult = 1;
1851 break;
1852 case 4:
1853 LogMult = 2;
1854 break;
1855 case 8:
1856 LogMult = 3;
1857 break;
1858 default:
1859 LogMult = 4;
1860 break;
1861 }
1862 if (Shift + LogMult <= 3) {
1863 Index = IndexVariable0;
1864 Shift += LogMult;
1865 continue;
1866 }
1867 }
1868 }
1869
1870 // Index is Index=Var<<Const && Const+Shift<=3 ==>
1871 // Index=Var, Shift+=Const
1872
1873 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
1874 // Index=Var, Shift+=log2(Const)
1875
1876 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
1877 // swap(Index,Base)
1878 // Similar for Base=Const*Var and Base=Var<<Const
1879
1880 // Base is Base=Var+Const ==>
1881 // set Base=Var, Offset+=Const
1882
1883 // Base is Base=Const+Var ==>
1884 // set Base=Var, Offset+=Const
1885
1886 // Base is Base=Var-Const ==>
1887 // set Base=Var, Offset-=Const
1888
1889 // Index is Index=Var+Const ==>
1890 // set Index=Var, Offset+=(Const<<Shift)
1891
1892 // Index is Index=Const+Var ==>
1893 // set Index=Var, Offset+=(Const<<Shift)
1894
1895 // Index is Index=Var-Const ==>
1896 // set Index=Var, Offset-=(Const<<Shift)
1897
1898 // TODO: consider overflow issues with respect to Offset.
1899 // TODO: handle symbolic constants.
1900 break;
1901 }
1902}
1903
1904} // anonymous namespace
1905
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001906void TargetX8632::lowerLoad(const InstLoad *Inst) {
1907 // A Load instruction can be treated the same as an Assign
1908 // instruction, after the source operand is transformed into an
1909 // OperandX8632Mem operand. Note that the address mode
1910 // optimization already creates an OperandX8632Mem operand, so it
1911 // doesn't need another level of transformation.
1912 Type Ty = Inst->getDest()->getType();
1913 Operand *Src0 = Inst->getSourceAddress();
1914 // Address mode optimization already creates an OperandX8632Mem
1915 // operand, so it doesn't need another level of transformation.
1916 if (!llvm::isa<OperandX8632Mem>(Src0)) {
1917 Variable *Base = llvm::dyn_cast<Variable>(Src0);
1918 Constant *Offset = llvm::dyn_cast<Constant>(Src0);
1919 assert(Base || Offset);
1920 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);
1921 }
1922
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001923 // Fuse this load with a subsequent Arithmetic instruction in the
1924 // following situations:
1925 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
1926 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
1927 //
1928 // TODO: Clean up and test thoroughly.
1929 //
1930 // TODO: Why limit to Arithmetic instructions? This could probably be
1931 // applied to most any instruction type. Look at all source operands
1932 // in the following instruction, and if there is one instance of the
1933 // load instruction's dest variable, and that instruction ends that
1934 // variable's live range, then make the substitution. Deal with
1935 // commutativity optimization in the arithmetic instruction lowering.
1936 InstArithmetic *NewArith = NULL;
1937 if (InstArithmetic *Arith =
1938 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
1939 Variable *DestLoad = Inst->getDest();
1940 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
1941 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
1942 if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
1943 DestLoad != Src0Arith) {
1944 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
1945 Arith->getSrc(0), Src0);
1946 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
1947 Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
1948 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
1949 Arith->getSrc(1), Src0);
1950 }
1951 if (NewArith) {
1952 Arith->setDeleted();
1953 Context.advanceNext();
1954 lowerArithmetic(NewArith);
1955 return;
1956 }
1957 }
1958
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001959 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
1960 lowerAssign(Assign);
1961}
1962
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001963void TargetX8632::doAddressOptLoad() {
1964 Inst *Inst = *Context.getCur();
1965 Variable *Dest = Inst->getDest();
1966 Operand *Addr = Inst->getSrc(0);
1967 Variable *Index = NULL;
1968 int32_t Shift = 0;
1969 int32_t Offset = 0; // TODO: make Constant
1970 Variable *Base = llvm::dyn_cast<Variable>(Addr);
1971 computeAddressOpt(Base, Index, Shift, Offset);
1972 if (Base && Addr != Base) {
1973 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
1974 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
1975 Shift);
1976 Inst->setDeleted();
1977 Context.insert(InstLoad::create(Func, Dest, Addr));
1978 }
1979}
1980
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001981void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
1982 Func->setError("Phi found in regular instruction list");
1983}
1984
1985void TargetX8632::lowerRet(const InstRet *Inst) {
1986 Variable *Reg = NULL;
1987 if (Inst->hasRetValue()) {
1988 Operand *Src0 = legalize(Inst->getRetValue());
1989 if (Src0->getType() == IceType_i64) {
1990 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);
1991 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);
1992 Reg = eax;
1993 Context.insert(InstFakeUse::create(Func, edx));
1994 } else if (Src0->getType() == IceType_f32 ||
1995 Src0->getType() == IceType_f64) {
1996 _fld(Src0);
1997 } else {
1998 _mov(Reg, Src0, Reg_eax);
1999 }
2000 }
2001 _ret(Reg);
2002 // Add a fake use of esp to make sure esp stays alive for the entire
2003 // function. Otherwise post-call esp adjustments get dead-code
2004 // eliminated. TODO: Are there more places where the fake use
2005 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
2006 // have a ret instruction.
2007 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
2008 Context.insert(InstFakeUse::create(Func, esp));
2009}
2010
2011void TargetX8632::lowerSelect(const InstSelect *Inst) {
2012 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
2013 Variable *Dest = Inst->getDest();
2014 Operand *SrcT = Inst->getTrueOperand();
2015 Operand *SrcF = Inst->getFalseOperand();
2016 Operand *Condition = legalize(Inst->getCondition());
2017 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
2018 InstX8632Label *Label = InstX8632Label::create(Func, this);
2019
2020 if (Dest->getType() == IceType_i64) {
2021 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2022 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2023 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
2024 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
2025 _cmp(Condition, Zero);
2026 _mov(DestLo, SrcLoRI);
2027 _mov(DestHi, SrcHiRI);
2028 _br(InstX8632Br::Br_ne, Label);
2029 Context.insert(InstFakeUse::create(Func, DestLo));
2030 Context.insert(InstFakeUse::create(Func, DestHi));
2031 Operand *SrcFLo = loOperand(SrcF);
2032 Operand *SrcFHi = hiOperand(SrcF);
2033 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);
2034 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);
2035 _mov(DestLo, SrcLoRI);
2036 _mov(DestHi, SrcHiRI);
2037 } else {
2038 _cmp(Condition, Zero);
2039 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
2040 _mov(Dest, SrcT);
2041 _br(InstX8632Br::Br_ne, Label);
2042 Context.insert(InstFakeUse::create(Func, Dest));
2043 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);
2044 _mov(Dest, SrcF);
2045 }
2046
2047 Context.insert(Label);
2048}
2049
2050void TargetX8632::lowerStore(const InstStore *Inst) {
2051 Operand *Value = Inst->getData();
2052 Operand *Addr = Inst->getAddr();
2053 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);
2054 // Address mode optimization already creates an OperandX8632Mem
2055 // operand, so it doesn't need another level of transformation.
2056 if (!NewAddr) {
2057 // The address will be either a constant (which represents a global
2058 // variable) or a variable, so either the Base or Offset component
2059 // of the OperandX8632Mem will be set.
2060 Variable *Base = llvm::dyn_cast<Variable>(Addr);
2061 Constant *Offset = llvm::dyn_cast<Constant>(Addr);
2062 assert(Base || Offset);
2063 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);
2064 }
2065 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));
2066
2067 if (NewAddr->getType() == IceType_i64) {
2068 Value = legalize(Value);
2069 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
2070 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
2071 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
2072 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
2073 } else {
2074 Value = legalize(Value, Legal_Reg | Legal_Imm, true);
2075 _store(Value, NewAddr);
2076 }
2077}
2078
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002079void TargetX8632::doAddressOptStore() {
2080 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur());
2081 Operand *Data = Inst->getData();
2082 Operand *Addr = Inst->getAddr();
2083 Variable *Index = NULL;
2084 int32_t Shift = 0;
2085 int32_t Offset = 0; // TODO: make Constant
2086 Variable *Base = llvm::dyn_cast<Variable>(Addr);
2087 computeAddressOpt(Base, Index, Shift, Offset);
2088 if (Base && Addr != Base) {
2089 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
2090 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
2091 Shift);
2092 Inst->setDeleted();
2093 Context.insert(InstStore::create(Func, Data, Addr));
2094 }
2095}
2096
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002097void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
2098 // This implements the most naive possible lowering.
2099 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
2100 Operand *Src0 = Inst->getComparison();
2101 SizeT NumCases = Inst->getNumCases();
2102 // OK, we'll be slightly less naive by forcing Src into a physical
2103 // register if there are 2 or more uses.
2104 if (NumCases >= 2)
2105 Src0 = legalizeToVar(Src0, true);
2106 else
2107 Src0 = legalize(Src0, Legal_All, true);
2108 for (SizeT I = 0; I < NumCases; ++I) {
2109 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
2110 _cmp(Src0, Value);
2111 _br(InstX8632Br::Br_e, Inst->getLabel(I));
2112 }
2113
2114 _br(Inst->getLabelDefault());
2115}
2116
2117void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
2118 const SizeT MaxSrcs = 0;
2119 Variable *Dest = NULL;
2120 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
2121 lowerCall(Call);
2122}
2123
2124Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
2125 bool AllowOverlap, int32_t RegNum) {
2126 // Assert that a physical register is allowed. To date, all calls
2127 // to legalize() allow a physical register. If a physical register
2128 // needs to be explicitly disallowed, then new code will need to be
2129 // written to force a spill.
2130 assert(Allowed & Legal_Reg);
2131 // If we're asking for a specific physical register, make sure we're
2132 // not allowing any other operand kinds. (This could be future
2133 // work, e.g. allow the shl shift amount to be either an immediate
2134 // or in ecx.)
2135 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
2136 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
2137 // Before doing anything with a Mem operand, we need to ensure
2138 // that the Base and Index components are in physical registers.
2139 Variable *Base = Mem->getBase();
2140 Variable *Index = Mem->getIndex();
2141 Variable *RegBase = NULL;
2142 Variable *RegIndex = NULL;
2143 if (Base) {
2144 RegBase = legalizeToVar(Base, true);
2145 }
2146 if (Index) {
2147 RegIndex = legalizeToVar(Index, true);
2148 }
2149 if (Base != RegBase || Index != RegIndex) {
2150 From =
2151 OperandX8632Mem::create(Func, Mem->getType(), RegBase,
2152 Mem->getOffset(), RegIndex, Mem->getShift());
2153 }
2154
2155 if (!(Allowed & Legal_Mem)) {
2156 Variable *Reg = makeReg(From->getType(), RegNum);
2157 _mov(Reg, From, RegNum);
2158 From = Reg;
2159 }
2160 return From;
2161 }
2162 if (llvm::isa<Constant>(From)) {
Matt Walad8f4a7d2014-06-18 09:55:03 -07002163 if (llvm::isa<ConstantUndef>(From)) {
2164 // Lower undefs to zero. Another option is to lower undefs to an
2165 // uninitialized register; however, using an uninitialized register
2166 // results in less predictable code.
2167 //
2168 // If in the future the implementation is changed to lower undef
2169 // values to uninitialized registers, a FakeDef will be needed:
2170 // Context.insert(InstFakeDef::create(Func, Reg));
2171 // This is in order to ensure that the live range of Reg is not
2172 // overestimated. If the constant being lowered is a 64 bit value,
2173 // then the result should be split and the lo and hi components will
2174 // need to go in uninitialized registers.
2175 From = Ctx->getConstantZero(From->getType());
2176 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002177 if (!(Allowed & Legal_Imm)) {
2178 Variable *Reg = makeReg(From->getType(), RegNum);
2179 _mov(Reg, From);
2180 From = Reg;
2181 }
2182 return From;
2183 }
2184 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
2185 // We need a new physical register for the operand if:
2186 // Mem is not allowed and Var->getRegNum() is unknown, or
2187 // RegNum is required and Var->getRegNum() doesn't match.
2188 if ((!(Allowed & Legal_Mem) && !Var->hasReg()) ||
2189 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
2190 Variable *Reg = makeReg(From->getType(), RegNum);
2191 if (RegNum == Variable::NoRegister) {
2192 Reg->setPreferredRegister(Var, AllowOverlap);
2193 }
2194 _mov(Reg, From);
2195 From = Reg;
2196 }
2197 return From;
2198 }
2199 llvm_unreachable("Unhandled operand kind in legalize()");
2200 return From;
2201}
2202
2203// Provide a trivial wrapper to legalize() for this common usage.
2204Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,
2205 int32_t RegNum) {
2206 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
2207}
2208
2209Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
2210 Variable *Reg = Func->makeVariable(Type, Context.getNode());
2211 if (RegNum == Variable::NoRegister)
2212 Reg->setWeightInfinite();
2213 else
2214 Reg->setRegNum(RegNum);
2215 return Reg;
2216}
2217
2218void TargetX8632::postLower() {
2219 if (Ctx->getOptLevel() != Opt_m1)
2220 return;
2221 // TODO: Avoid recomputing WhiteList every instruction.
2222 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);
2223 // Make one pass to black-list pre-colored registers. TODO: If
2224 // there was some prior register allocation pass that made register
2225 // assignments, those registers need to be black-listed here as
2226 // well.
2227 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
2228 ++I) {
2229 const Inst *Inst = *I;
2230 if (Inst->isDeleted())
2231 continue;
2232 if (llvm::isa<InstFakeKill>(Inst))
2233 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002234 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
2235 Operand *Src = Inst->getSrc(SrcNum);
2236 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002237 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002238 const Variable *Var = Src->getVar(J);
2239 if (!Var->hasReg())
2240 continue;
2241 WhiteList[Var->getRegNum()] = false;
2242 }
2243 }
2244 }
2245 // The second pass colors infinite-weight variables.
2246 llvm::SmallBitVector AvailableRegisters = WhiteList;
2247 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
2248 ++I) {
2249 const Inst *Inst = *I;
2250 if (Inst->isDeleted())
2251 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002252 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
2253 Operand *Src = Inst->getSrc(SrcNum);
2254 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002255 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002256 Variable *Var = Src->getVar(J);
2257 if (Var->hasReg())
2258 continue;
2259 if (!Var->getWeight().isInf())
2260 continue;
2261 llvm::SmallBitVector AvailableTypedRegisters =
2262 AvailableRegisters & getRegisterSetForType(Var->getType());
2263 if (!AvailableTypedRegisters.any()) {
2264 // This is a hack in case we run out of physical registers
2265 // due to an excessive number of "push" instructions from
2266 // lowering a call.
2267 AvailableRegisters = WhiteList;
2268 AvailableTypedRegisters =
2269 AvailableRegisters & getRegisterSetForType(Var->getType());
2270 }
2271 assert(AvailableTypedRegisters.any());
2272 int32_t RegNum = AvailableTypedRegisters.find_first();
2273 Var->setRegNum(RegNum);
2274 AvailableRegisters[RegNum] = false;
2275 }
2276 }
2277 }
2278}
2279
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002280template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
2281 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07002282 // It would be better to prefix with ".L$" instead of "L$", but
2283 // llvm-mc doesn't parse "dword ptr [.L$foo]".
2284 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
2285}
2286
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002287template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
2288 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07002289 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
2290}
2291
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002292} // end of namespace Ice