blob: 1e510471a902343e8b794a3dd7a8a9a1eb20862a [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
12// high-level instruction. It also implements
13// TargetX8632Fast::postLower() which does the simplest possible
14// register allocation for the "fast" target.
15//
16//===----------------------------------------------------------------------===//
17
18#include "IceDefs.h"
19#include "IceCfg.h"
20#include "IceCfgNode.h"
21#include "IceInstX8632.h"
22#include "IceOperand.h"
23#include "IceTargetLoweringX8632.def"
24#include "IceTargetLoweringX8632.h"
25
26namespace Ice {
27
28namespace {
29
30// The following table summarizes the logic for lowering the fcmp instruction.
31// There is one table entry for each of the 16 conditions. A comment in
32// lowerFcmp() describes the lowering template. In the most general case, there
33// is a compare followed by two conditional branches, because some fcmp
34// conditions don't map to a single x86 conditional branch. However, in many
35// cases it is possible to swap the operands in the comparison and have a single
36// conditional branch. Since it's quite tedious to validate the table by hand,
37// good execution tests are helpful.
38
39const struct TableFcmp_ {
40 uint32_t Default;
41 bool SwapOperands;
42 InstX8632Br::BrCond C1, C2;
43} TableFcmp[] = {
44#define X(val, dflt, swap, C1, C2) \
45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
46 ,
47 FCMPX8632_TABLE
48#undef X
49 };
50const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
51
52// The following table summarizes the logic for lowering the icmp instruction
53// for i32 and narrower types. Each icmp condition has a clear mapping to an
54// x86 conditional branch instruction.
55
56const struct TableIcmp32_ {
57 InstX8632Br::BrCond Mapping;
58} TableIcmp32[] = {
59#define X(val, C_32, C1_64, C2_64, C3_64) \
60 { InstX8632Br::C_32 } \
61 ,
62 ICMPX8632_TABLE
63#undef X
64 };
65const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
66
67// The following table summarizes the logic for lowering the icmp instruction
68// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
69// conditional branches are needed. For the other conditions, three separate
70// conditional branches are needed.
71const struct TableIcmp64_ {
72 InstX8632Br::BrCond C1, C2, C3;
73} TableIcmp64[] = {
74#define X(val, C_32, C1_64, C2_64, C3_64) \
75 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
76 ,
77 ICMPX8632_TABLE
78#undef X
79 };
80const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
81
82InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping;
86}
87
88// In some cases, there are x-macros tables for both high-level and
89// low-level instructions/operands that use the same enum key value.
90// The tables are kept separate to maintain a proper separation
91// between abstraction layers. There is a risk that the tables
92// could get out of sync if enum values are reordered or if entries
93// are added or deleted. This dummy function uses static_assert to
94// ensure everything is kept in sync.
95void xMacroIntegrityCheck() {
96 // Validate the enum values in FCMPX8632_TABLE.
97 {
98 // Define a temporary set of enum values based on low-level
99 // table entries.
100 enum _tmp_enum {
101#define X(val, dflt, swap, C1, C2) _tmp_##val,
102 FCMPX8632_TABLE
103#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700104 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700105 };
106// Define a set of constants based on high-level table entries.
107#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
108 ICEINSTFCMP_TABLE;
109#undef X
110// Define a set of constants based on low-level table entries,
111// and ensure the table entry keys are consistent.
112#define X(val, dflt, swap, C1, C2) \
113 static const int _table2_##val = _tmp_##val; \
114 STATIC_ASSERT(_table1_##val == _table2_##val);
115 FCMPX8632_TABLE;
116#undef X
117// Repeat the static asserts with respect to the high-level
118// table entries in case the high-level table has extra entries.
119#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
120 ICEINSTFCMP_TABLE;
121#undef X
122 }
123
124 // Validate the enum values in ICMPX8632_TABLE.
125 {
126 // Define a temporary set of enum values based on low-level
127 // table entries.
128 enum _tmp_enum {
129#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
130 ICMPX8632_TABLE
131#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700132 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700133 };
134// Define a set of constants based on high-level table entries.
135#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
136 ICEINSTICMP_TABLE;
137#undef X
138// Define a set of constants based on low-level table entries,
139// and ensure the table entry keys are consistent.
140#define X(val, C_32, C1_64, C2_64, C3_64) \
141 static const int _table2_##val = _tmp_##val; \
142 STATIC_ASSERT(_table1_##val == _table2_##val);
143 ICMPX8632_TABLE;
144#undef X
145// Repeat the static asserts with respect to the high-level
146// table entries in case the high-level table has extra entries.
147#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
148 ICEINSTICMP_TABLE;
149#undef X
150 }
151
152 // Validate the enum values in ICETYPEX8632_TABLE.
153 {
154 // Define a temporary set of enum values based on low-level
155 // table entries.
156 enum _tmp_enum {
157#define X(tag, cvt, sdss, width) _tmp_##tag,
158 ICETYPEX8632_TABLE
159#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700160 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700161 };
162// Define a set of constants based on high-level table entries.
163#define X(tag, size, align, str) static const int _table1_##tag = tag;
164 ICETYPE_TABLE;
165#undef X
166// Define a set of constants based on low-level table entries,
167// and ensure the table entry keys are consistent.
168#define X(tag, cvt, sdss, width) \
169 static const int _table2_##tag = _tmp_##tag; \
170 STATIC_ASSERT(_table1_##tag == _table2_##tag);
171 ICETYPEX8632_TABLE;
172#undef X
173// Repeat the static asserts with respect to the high-level
174// table entries in case the high-level table has extra entries.
175#define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
176 ICETYPE_TABLE;
177#undef X
178 }
179}
180
181} // end of anonymous namespace
182
183TargetX8632::TargetX8632(Cfg *Func)
184 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),
185 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
186 PhysicalRegisters(VarList(Reg_NUM)) {
187 // TODO: Don't initialize IntegerRegisters and friends every time.
188 // Instead, initialize in some sort of static initializer for the
189 // class.
190 llvm::SmallBitVector IntegerRegisters(Reg_NUM);
191 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
192 llvm::SmallBitVector FloatRegisters(Reg_NUM);
193 llvm::SmallBitVector InvalidRegisters(Reg_NUM);
194 ScratchRegs.resize(Reg_NUM);
195#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
196 frameptr, isI8, isInt, isFP) \
197 IntegerRegisters[val] = isInt; \
198 IntegerRegistersI8[val] = isI8; \
199 FloatRegisters[val] = isFP; \
200 ScratchRegs[val] = scratch;
201 REGX8632_TABLE;
202#undef X
203 TypeToRegisterSet[IceType_void] = InvalidRegisters;
204 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
205 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
206 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
207 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
208 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
209 TypeToRegisterSet[IceType_f32] = FloatRegisters;
210 TypeToRegisterSet[IceType_f64] = FloatRegisters;
211}
212
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700213void TargetX8632::translateO2() {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700214 GlobalContext *Context = Func->getContext();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700215
216 // Lower Phi instructions.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700217 Timer T_placePhiLoads;
218 Func->placePhiLoads();
219 if (Func->hasError())
220 return;
221 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
222 Timer T_placePhiStores;
223 Func->placePhiStores();
224 if (Func->hasError())
225 return;
226 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
227 Timer T_deletePhis;
228 Func->deletePhis();
229 if (Func->hasError())
230 return;
231 T_deletePhis.printElapsedUs(Context, "deletePhis()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700232 Func->dump("After Phi lowering");
233
234 // Address mode optimization.
235 Timer T_doAddressOpt;
236 Func->doAddressOpt();
237 T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()");
238
239 // Target lowering. This requires liveness analysis for some parts
240 // of the lowering decisions, such as compare/branch fusing. If
241 // non-lightweight liveness analysis is used, the instructions need
242 // to be renumbered first. TODO: This renumbering should only be
243 // necessary if we're actually calculating live intervals, which we
244 // only do for register allocation.
245 Timer T_renumber1;
246 Func->renumberInstructions();
247 if (Func->hasError())
248 return;
249 T_renumber1.printElapsedUs(Context, "renumberInstructions()");
250 // TODO: It should be sufficient to use the fastest liveness
251 // calculation, i.e. livenessLightweight(). However, for some
252 // reason that slows down the rest of the translation. Investigate.
253 Timer T_liveness1;
254 Func->liveness(Liveness_Basic);
255 if (Func->hasError())
256 return;
257 T_liveness1.printElapsedUs(Context, "liveness()");
258 Func->dump("After x86 address mode opt");
259 Timer T_genCode;
260 Func->genCode();
261 if (Func->hasError())
262 return;
263 T_genCode.printElapsedUs(Context, "genCode()");
264
265 // Register allocation. This requires instruction renumbering and
266 // full liveness analysis.
267 Timer T_renumber2;
268 Func->renumberInstructions();
269 if (Func->hasError())
270 return;
271 T_renumber2.printElapsedUs(Context, "renumberInstructions()");
272 Timer T_liveness2;
273 Func->liveness(Liveness_Intervals);
274 if (Func->hasError())
275 return;
276 T_liveness2.printElapsedUs(Context, "liveness()");
277 // Validate the live range computations. Do it outside the timing
278 // code. TODO: Put this under a flag.
279 bool ValidLiveness = Func->validateLiveness();
280 assert(ValidLiveness);
281 (void)ValidLiveness; // used only in assert()
282 ComputedLiveRanges = true;
283 // The post-codegen dump is done here, after liveness analysis and
284 // associated cleanup, to make the dump cleaner and more useful.
285 Func->dump("After initial x8632 codegen");
286 Timer T_regAlloc;
287 regAlloc();
288 if (Func->hasError())
289 return;
290 T_regAlloc.printElapsedUs(Context, "regAlloc()");
291 Func->dump("After linear scan regalloc");
292
293 // Stack frame mapping.
294 Timer T_genFrame;
295 Func->genFrame();
296 if (Func->hasError())
297 return;
298 T_genFrame.printElapsedUs(Context, "genFrame()");
299 Func->dump("After stack frame mapping");
300}
301
302void TargetX8632::translateOm1() {
303 GlobalContext *Context = Func->getContext();
304 Timer T_placePhiLoads;
305 Func->placePhiLoads();
306 if (Func->hasError())
307 return;
308 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
309 Timer T_placePhiStores;
310 Func->placePhiStores();
311 if (Func->hasError())
312 return;
313 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
314 Timer T_deletePhis;
315 Func->deletePhis();
316 if (Func->hasError())
317 return;
318 T_deletePhis.printElapsedUs(Context, "deletePhis()");
319 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700320
321 Timer T_genCode;
322 Func->genCode();
323 if (Func->hasError())
324 return;
325 T_genCode.printElapsedUs(Context, "genCode()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700326 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700327
328 Timer T_genFrame;
329 Func->genFrame();
330 if (Func->hasError())
331 return;
332 T_genFrame.printElapsedUs(Context, "genFrame()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700333 Func->dump("After stack frame mapping");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700334}
335
336IceString TargetX8632::RegNames[] = {
337#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
338 frameptr, isI8, isInt, isFP) \
339 name,
340 REGX8632_TABLE
341#undef X
342};
343
344Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
345 assert(RegNum < PhysicalRegisters.size());
346 Variable *Reg = PhysicalRegisters[RegNum];
347 if (Reg == NULL) {
348 CfgNode *Node = NULL; // NULL means multi-block lifetime
349 Reg = Func->makeVariable(IceType_i32, Node);
350 Reg->setRegNum(RegNum);
351 PhysicalRegisters[RegNum] = Reg;
352 }
353 return Reg;
354}
355
356IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
357 assert(RegNum < Reg_NUM);
358 static IceString RegNames8[] = {
359#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
360 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700361 name8,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700362 REGX8632_TABLE
363#undef X
364 };
365 static IceString RegNames16[] = {
366#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
367 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700368 name16,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700369 REGX8632_TABLE
370#undef X
371 };
372 switch (Ty) {
373 case IceType_i1:
374 case IceType_i8:
375 return RegNames8[RegNum];
376 case IceType_i16:
377 return RegNames16[RegNum];
378 default:
379 return RegNames[RegNum];
380 }
381}
382
383void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const {
384 Ostream &Str = Ctx->getStrEmit();
385 assert(Var->getLocalUseNode() == NULL ||
386 Var->getLocalUseNode() == Func->getCurrentNode());
387 if (Var->hasReg()) {
388 Str << getRegName(Var->getRegNum(), Var->getType());
389 return;
390 }
391 Str << InstX8632::getWidthString(Var->getType());
392 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700393 int32_t Offset = Var->getStackOffset();
394 if (!hasFramePointer())
395 Offset += getStackAdjustment();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700396 if (Offset) {
397 if (Offset > 0)
398 Str << "+";
399 Str << Offset;
400 }
401 Str << "]";
402}
403
404// Helper function for addProlog(). Sets the frame offset for Arg,
405// updates InArgsSizeBytes according to Arg's width, and generates an
406// instruction to copy Arg into its assigned register if applicable.
407// For an I64 arg that has been split into Lo and Hi components, it
408// calls itself recursively on the components, taking care to handle
409// Lo first because of the little-endian architecture.
410void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700411 size_t BasicFrameOffset,
412 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700413 Variable *Lo = Arg->getLo();
414 Variable *Hi = Arg->getHi();
415 Type Ty = Arg->getType();
416 if (Lo && Hi && Ty == IceType_i64) {
417 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
418 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
419 setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
420 setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
421 return;
422 }
423 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
424 if (Arg->hasReg()) {
425 assert(Ty != IceType_i64);
426 OperandX8632Mem *Mem = OperandX8632Mem::create(
427 Func, Ty, FramePtr,
428 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
429 _mov(Arg, Mem);
430 }
431 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
432}
433
434void TargetX8632::addProlog(CfgNode *Node) {
435 // If SimpleCoalescing is false, each variable without a register
436 // gets its own unique stack slot, which leads to large stack
437 // frames. If SimpleCoalescing is true, then each "global" variable
438 // without a register gets its own slot, but "local" variable slots
439 // are reused across basic blocks. E.g., if A and B are local to
440 // block 1 and C is local to block 2, then C may share a slot with A
441 // or B.
442 const bool SimpleCoalescing = true;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700443 size_t InArgsSizeBytes = 0;
444 size_t RetIpSizeBytes = 4;
445 size_t PreservedRegsSizeBytes = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700446 LocalsSizeBytes = 0;
447 Context.init(Node);
448 Context.setInsertPoint(Context.getCur());
449
450 // Determine stack frame offsets for each Variable without a
451 // register assignment. This can be done as one variable per stack
452 // slot. Or, do coalescing by running the register allocator again
453 // with an infinite set of registers (as a side effect, this gives
454 // variables a second chance at physical register assignment).
455 //
456 // A middle ground approach is to leverage sparsity and allocate one
457 // block of space on the frame for globals (variables with
458 // multi-block lifetime), and one block to share for locals
459 // (single-block lifetime).
460
461 llvm::SmallBitVector CalleeSaves =
462 getRegisterSet(RegSet_CalleeSave, RegSet_None);
463
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700464 size_t GlobalsSize = 0;
465 std::vector<size_t> LocalsSize(Func->getNumNodes());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700466
467 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
468 // LocalsSizeBytes.
469 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
470 const VarList &Variables = Func->getVariables();
471 const VarList &Args = Func->getArgs();
472 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
473 I != E; ++I) {
474 Variable *Var = *I;
475 if (Var->hasReg()) {
476 RegsUsed[Var->getRegNum()] = true;
477 continue;
478 }
479 // An argument passed on the stack already has a stack slot.
480 if (Var->getIsArg())
481 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700482 // An unreferenced variable doesn't need a stack slot.
483 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
484 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700485 // A spill slot linked to a variable with a stack slot should reuse
486 // that stack slot.
487 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
488 if (Variable *Linked = Var->getPreferredRegister()) {
489 if (!Linked->hasReg())
490 continue;
491 }
492 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700493 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700494 if (SimpleCoalescing) {
495 if (Var->isMultiblockLife()) {
496 GlobalsSize += Increment;
497 } else {
498 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
499 LocalsSize[NodeIndex] += Increment;
500 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
501 LocalsSizeBytes = LocalsSize[NodeIndex];
502 }
503 } else {
504 LocalsSizeBytes += Increment;
505 }
506 }
507 LocalsSizeBytes += GlobalsSize;
508
509 // Add push instructions for preserved registers.
510 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
511 if (CalleeSaves[i] && RegsUsed[i]) {
512 PreservedRegsSizeBytes += 4;
513 const bool SuppressStackAdjustment = true;
514 _push(getPhysicalRegister(i), SuppressStackAdjustment);
515 }
516 }
517
518 // Generate "push ebp; mov ebp, esp"
519 if (IsEbpBasedFrame) {
520 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
521 .count() == 0);
522 PreservedRegsSizeBytes += 4;
523 Variable *ebp = getPhysicalRegister(Reg_ebp);
524 Variable *esp = getPhysicalRegister(Reg_esp);
525 const bool SuppressStackAdjustment = true;
526 _push(ebp, SuppressStackAdjustment);
527 _mov(ebp, esp);
528 }
529
530 // Generate "sub esp, LocalsSizeBytes"
531 if (LocalsSizeBytes)
532 _sub(getPhysicalRegister(Reg_esp),
533 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
534
535 resetStackAdjustment();
536
537 // Fill in stack offsets for args, and copy args into registers for
538 // those that were register-allocated. Args are pushed right to
539 // left, so Arg[0] is closest to the stack/frame pointer.
540 //
541 // TODO: Make this right for different width args, calling
542 // conventions, etc. For one thing, args passed in registers will
543 // need to be copied/shuffled to their home registers (the
544 // RegManager code may have some permutation logic to leverage),
545 // and if they have no home register, home space will need to be
546 // allocated on the stack to copy into.
547 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700548 size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700549 if (!IsEbpBasedFrame)
550 BasicFrameOffset += LocalsSizeBytes;
551 for (SizeT i = 0; i < Args.size(); ++i) {
552 Variable *Arg = Args[i];
553 setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
554 }
555
556 // Fill in stack offsets for locals.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700557 size_t TotalGlobalsSize = GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700558 GlobalsSize = 0;
559 LocalsSize.assign(LocalsSize.size(), 0);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700560 size_t NextStackOffset = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700561 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
562 I != E; ++I) {
563 Variable *Var = *I;
564 if (Var->hasReg()) {
565 RegsUsed[Var->getRegNum()] = true;
566 continue;
567 }
568 if (Var->getIsArg())
569 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700570 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
571 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700572 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
573 if (Variable *Linked = Var->getPreferredRegister()) {
574 if (!Linked->hasReg()) {
575 // TODO: Make sure Linked has already been assigned a stack
576 // slot.
577 Var->setStackOffset(Linked->getStackOffset());
578 continue;
579 }
580 }
581 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700582 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700583 if (SimpleCoalescing) {
584 if (Var->isMultiblockLife()) {
585 GlobalsSize += Increment;
586 NextStackOffset = GlobalsSize;
587 } else {
588 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
589 LocalsSize[NodeIndex] += Increment;
590 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
591 }
592 } else {
593 NextStackOffset += Increment;
594 }
595 if (IsEbpBasedFrame)
596 Var->setStackOffset(-NextStackOffset);
597 else
598 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
599 }
600 this->FrameSizeLocals = NextStackOffset;
601 this->HasComputedFrame = true;
602
603 if (Func->getContext()->isVerbose(IceV_Frame)) {
604 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
605 << "\n"
606 << "InArgsSizeBytes=" << InArgsSizeBytes
607 << "\n"
608 << "PreservedRegsSizeBytes="
609 << PreservedRegsSizeBytes << "\n";
610 }
611}
612
613void TargetX8632::addEpilog(CfgNode *Node) {
614 InstList &Insts = Node->getInsts();
615 InstList::reverse_iterator RI, E;
616 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
617 if (llvm::isa<InstX8632Ret>(*RI))
618 break;
619 }
620 if (RI == E)
621 return;
622
623 // Convert the reverse_iterator position into its corresponding
624 // (forward) iterator position.
625 InstList::iterator InsertPoint = RI.base();
626 --InsertPoint;
627 Context.init(Node);
628 Context.setInsertPoint(InsertPoint);
629
630 Variable *esp = getPhysicalRegister(Reg_esp);
631 if (IsEbpBasedFrame) {
632 Variable *ebp = getPhysicalRegister(Reg_ebp);
633 _mov(esp, ebp);
634 _pop(ebp);
635 } else {
636 // add esp, LocalsSizeBytes
637 if (LocalsSizeBytes)
638 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
639 }
640
641 // Add pop instructions for preserved registers.
642 llvm::SmallBitVector CalleeSaves =
643 getRegisterSet(RegSet_CalleeSave, RegSet_None);
644 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
645 SizeT j = CalleeSaves.size() - i - 1;
646 if (j == Reg_ebp && IsEbpBasedFrame)
647 continue;
648 if (CalleeSaves[j] && RegsUsed[j]) {
649 _pop(getPhysicalRegister(j));
650 }
651 }
652}
653
Jim Stichnothf61d5b22014-05-23 13:31:24 -0700654template <typename T> struct PoolTypeConverter {};
655
656template <> struct PoolTypeConverter<float> {
657 typedef float PrimitiveFpType;
658 typedef uint32_t PrimitiveIntType;
659 typedef ConstantFloat IceType;
660 static const Type Ty = IceType_f32;
661 static const char *TypeName;
662 static const char *AsmTag;
663 static const char *PrintfString;
664};
665const char *PoolTypeConverter<float>::TypeName = "float";
666const char *PoolTypeConverter<float>::AsmTag = ".long";
667const char *PoolTypeConverter<float>::PrintfString = "0x%x";
668
669template <> struct PoolTypeConverter<double> {
670 typedef double PrimitiveFpType;
671 typedef uint64_t PrimitiveIntType;
672 typedef ConstantDouble IceType;
673 static const Type Ty = IceType_f64;
674 static const char *TypeName;
675 static const char *AsmTag;
676 static const char *PrintfString;
677};
678const char *PoolTypeConverter<double>::TypeName = "double";
679const char *PoolTypeConverter<double>::AsmTag = ".quad";
680const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
681
682template <typename T> void TargetX8632::emitConstantPool() const {
683 Ostream &Str = Ctx->getStrEmit();
684 Type Ty = T::Ty;
685 SizeT Align = typeAlignInBytes(Ty);
686 ConstantList Pool = Ctx->getConstantPool(Ty);
687
688 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
689 << "\n";
690 Str << "\t.align\t" << Align << "\n";
691 for (ConstantList::const_iterator I = Pool.begin(), E = Pool.end(); I != E;
692 ++I) {
693 typename T::IceType *Const = llvm::cast<typename T::IceType>(*I);
694 typename T::PrimitiveFpType Value = Const->getValue();
695 // Use memcpy() to copy bits from Value into RawValue in a way
696 // that avoids breaking strict-aliasing rules.
697 typename T::PrimitiveIntType RawValue;
698 memcpy(&RawValue, &Value, sizeof(Value));
699 char buf[30];
700 int CharsPrinted =
701 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
702 assert(CharsPrinted >= 0 &&
703 (size_t)CharsPrinted < llvm::array_lengthof(buf));
704 (void)CharsPrinted; // avoid warnings if asserts are disabled
705 Str << "L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
706 Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
707 << Value << "\n";
708 }
709}
710
711void TargetX8632::emitConstants() const {
712 emitConstantPool<PoolTypeConverter<float> >();
713 emitConstantPool<PoolTypeConverter<double> >();
714
715 // No need to emit constants from the int pool since (for x86) they
716 // are embedded as immediates in the instructions.
717}
718
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700719void TargetX8632::split64(Variable *Var) {
720 switch (Var->getType()) {
721 default:
722 return;
723 case IceType_i64:
724 // TODO: Only consider F64 if we need to push each half when
725 // passing as an argument to a function call. Note that each half
726 // is still typed as I32.
727 case IceType_f64:
728 break;
729 }
730 Variable *Lo = Var->getLo();
731 Variable *Hi = Var->getHi();
732 if (Lo) {
733 assert(Hi);
734 return;
735 }
736 assert(Hi == NULL);
737 Lo = Func->makeVariable(IceType_i32, Context.getNode(),
738 Var->getName() + "__lo");
739 Hi = Func->makeVariable(IceType_i32, Context.getNode(),
740 Var->getName() + "__hi");
741 Var->setLoHi(Lo, Hi);
742 if (Var->getIsArg()) {
743 Lo->setIsArg(Func);
744 Hi->setIsArg(Func);
745 }
746}
747
748Operand *TargetX8632::loOperand(Operand *Operand) {
749 assert(Operand->getType() == IceType_i64);
750 if (Operand->getType() != IceType_i64)
751 return Operand;
752 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
753 split64(Var);
754 return Var->getLo();
755 }
756 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
757 uint64_t Mask = (1ull << 32) - 1;
758 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);
759 }
760 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
761 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
762 Mem->getOffset(), Mem->getIndex(),
763 Mem->getShift());
764 }
765 llvm_unreachable("Unsupported operand type");
766 return NULL;
767}
768
769Operand *TargetX8632::hiOperand(Operand *Operand) {
770 assert(Operand->getType() == IceType_i64);
771 if (Operand->getType() != IceType_i64)
772 return Operand;
773 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
774 split64(Var);
775 return Var->getHi();
776 }
777 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
778 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);
779 }
780 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
781 Constant *Offset = Mem->getOffset();
782 if (Offset == NULL)
783 Offset = Ctx->getConstantInt(IceType_i32, 4);
784 else if (ConstantInteger *IntOffset =
785 llvm::dyn_cast<ConstantInteger>(Offset)) {
786 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());
787 } else if (ConstantRelocatable *SymOffset =
788 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
789 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
790 SymOffset->getName());
791 }
792 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
793 Mem->getIndex(), Mem->getShift());
794 }
795 llvm_unreachable("Unsupported operand type");
796 return NULL;
797}
798
799llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
800 RegSetMask Exclude) const {
801 llvm::SmallBitVector Registers(Reg_NUM);
802
803#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
804 frameptr, isI8, isInt, isFP) \
805 if (scratch && (Include & RegSet_CallerSave)) \
806 Registers[val] = true; \
807 if (preserved && (Include & RegSet_CalleeSave)) \
808 Registers[val] = true; \
809 if (stackptr && (Include & RegSet_StackPointer)) \
810 Registers[val] = true; \
811 if (frameptr && (Include & RegSet_FramePointer)) \
812 Registers[val] = true; \
813 if (scratch && (Exclude & RegSet_CallerSave)) \
814 Registers[val] = false; \
815 if (preserved && (Exclude & RegSet_CalleeSave)) \
816 Registers[val] = false; \
817 if (stackptr && (Exclude & RegSet_StackPointer)) \
818 Registers[val] = false; \
819 if (frameptr && (Exclude & RegSet_FramePointer)) \
820 Registers[val] = false;
821
822 REGX8632_TABLE
823
824#undef X
825
826 return Registers;
827}
828
829void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
830 IsEbpBasedFrame = true;
831 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
832 // the number of adjustments of esp, etc.
833 Variable *esp = getPhysicalRegister(Reg_esp);
834 Operand *TotalSize = legalize(Inst->getSizeInBytes());
835 Variable *Dest = Inst->getDest();
836 _sub(esp, TotalSize);
837 _mov(Dest, esp);
838}
839
840void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
841 Variable *Dest = Inst->getDest();
842 Operand *Src0 = legalize(Inst->getSrc(0));
843 Operand *Src1 = legalize(Inst->getSrc(1));
844 if (Dest->getType() == IceType_i64) {
845 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
846 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
847 Operand *Src0Lo = loOperand(Src0);
848 Operand *Src0Hi = hiOperand(Src0);
849 Operand *Src1Lo = loOperand(Src1);
850 Operand *Src1Hi = hiOperand(Src1);
851 Variable *T_Lo = NULL, *T_Hi = NULL;
852 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -0700853 case InstArithmetic::_num:
854 llvm_unreachable("Unknown arithmetic operator");
855 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700856 case InstArithmetic::Add:
857 _mov(T_Lo, Src0Lo);
858 _add(T_Lo, Src1Lo);
859 _mov(DestLo, T_Lo);
860 _mov(T_Hi, Src0Hi);
861 _adc(T_Hi, Src1Hi);
862 _mov(DestHi, T_Hi);
863 break;
864 case InstArithmetic::And:
865 _mov(T_Lo, Src0Lo);
866 _and(T_Lo, Src1Lo);
867 _mov(DestLo, T_Lo);
868 _mov(T_Hi, Src0Hi);
869 _and(T_Hi, Src1Hi);
870 _mov(DestHi, T_Hi);
871 break;
872 case InstArithmetic::Or:
873 _mov(T_Lo, Src0Lo);
874 _or(T_Lo, Src1Lo);
875 _mov(DestLo, T_Lo);
876 _mov(T_Hi, Src0Hi);
877 _or(T_Hi, Src1Hi);
878 _mov(DestHi, T_Hi);
879 break;
880 case InstArithmetic::Xor:
881 _mov(T_Lo, Src0Lo);
882 _xor(T_Lo, Src1Lo);
883 _mov(DestLo, T_Lo);
884 _mov(T_Hi, Src0Hi);
885 _xor(T_Hi, Src1Hi);
886 _mov(DestHi, T_Hi);
887 break;
888 case InstArithmetic::Sub:
889 _mov(T_Lo, Src0Lo);
890 _sub(T_Lo, Src1Lo);
891 _mov(DestLo, T_Lo);
892 _mov(T_Hi, Src0Hi);
893 _sbb(T_Hi, Src1Hi);
894 _mov(DestHi, T_Hi);
895 break;
896 case InstArithmetic::Mul: {
897 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
898 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
899 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);
900 // gcc does the following:
901 // a=b*c ==>
902 // t1 = b.hi; t1 *=(imul) c.lo
903 // t2 = c.hi; t2 *=(imul) b.lo
904 // t3:eax = b.lo
905 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
906 // a.lo = t4.lo
907 // t4.hi += t1
908 // t4.hi += t2
909 // a.hi = t4.hi
910 _mov(T_1, Src0Hi);
911 _imul(T_1, Src1Lo);
912 _mov(T_2, Src1Hi);
913 _imul(T_2, Src0Lo);
914 _mov(T_3, Src0Lo, Reg_eax);
915 _mul(T_4Lo, T_3, Src1Lo);
916 // The mul instruction produces two dest variables, edx:eax. We
917 // create a fake definition of edx to account for this.
918 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
919 _mov(DestLo, T_4Lo);
920 _add(T_4Hi, T_1);
921 _add(T_4Hi, T_2);
922 _mov(DestHi, T_4Hi);
923 } break;
924 case InstArithmetic::Shl: {
925 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
926 // gcc does the following:
927 // a=b<<c ==>
928 // t1:ecx = c.lo & 0xff
929 // t2 = b.lo
930 // t3 = b.hi
931 // t3 = shld t3, t2, t1
932 // t2 = shl t2, t1
933 // test t1, 0x20
934 // je L1
935 // use(t3)
936 // t3 = t2
937 // t2 = 0
938 // L1:
939 // a.lo = t2
940 // a.hi = t3
941 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
942 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
943 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
944 InstX8632Label *Label = InstX8632Label::create(Func, this);
945 _mov(T_1, Src1Lo, Reg_ecx);
946 _mov(T_2, Src0Lo);
947 _mov(T_3, Src0Hi);
948 _shld(T_3, T_2, T_1);
949 _shl(T_2, T_1);
950 _test(T_1, BitTest);
951 _br(InstX8632Br::Br_e, Label);
952 // Because of the intra-block control flow, we need to fake a use
953 // of T_3 to prevent its earlier definition from being dead-code
954 // eliminated in the presence of its later definition.
955 Context.insert(InstFakeUse::create(Func, T_3));
956 _mov(T_3, T_2);
957 _mov(T_2, Zero);
958 Context.insert(Label);
959 _mov(DestLo, T_2);
960 _mov(DestHi, T_3);
961 } break;
962 case InstArithmetic::Lshr: {
963 // a=b>>c (unsigned) ==>
964 // t1:ecx = c.lo & 0xff
965 // t2 = b.lo
966 // t3 = b.hi
967 // t2 = shrd t2, t3, t1
968 // t3 = shr t3, t1
969 // test t1, 0x20
970 // je L1
971 // use(t2)
972 // t2 = t3
973 // t3 = 0
974 // L1:
975 // a.lo = t2
976 // a.hi = t3
977 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
978 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
979 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
980 InstX8632Label *Label = InstX8632Label::create(Func, this);
981 _mov(T_1, Src1Lo, Reg_ecx);
982 _mov(T_2, Src0Lo);
983 _mov(T_3, Src0Hi);
984 _shrd(T_2, T_3, T_1);
985 _shr(T_3, T_1);
986 _test(T_1, BitTest);
987 _br(InstX8632Br::Br_e, Label);
988 // Because of the intra-block control flow, we need to fake a use
989 // of T_3 to prevent its earlier definition from being dead-code
990 // eliminated in the presence of its later definition.
991 Context.insert(InstFakeUse::create(Func, T_2));
992 _mov(T_2, T_3);
993 _mov(T_3, Zero);
994 Context.insert(Label);
995 _mov(DestLo, T_2);
996 _mov(DestHi, T_3);
997 } break;
998 case InstArithmetic::Ashr: {
999 // a=b>>c (signed) ==>
1000 // t1:ecx = c.lo & 0xff
1001 // t2 = b.lo
1002 // t3 = b.hi
1003 // t2 = shrd t2, t3, t1
1004 // t3 = sar t3, t1
1005 // test t1, 0x20
1006 // je L1
1007 // use(t2)
1008 // t2 = t3
1009 // t3 = sar t3, 0x1f
1010 // L1:
1011 // a.lo = t2
1012 // a.hi = t3
1013 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1014 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
1015 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);
1016 InstX8632Label *Label = InstX8632Label::create(Func, this);
1017 _mov(T_1, Src1Lo, Reg_ecx);
1018 _mov(T_2, Src0Lo);
1019 _mov(T_3, Src0Hi);
1020 _shrd(T_2, T_3, T_1);
1021 _sar(T_3, T_1);
1022 _test(T_1, BitTest);
1023 _br(InstX8632Br::Br_e, Label);
1024 // Because of the intra-block control flow, we need to fake a use
1025 // of T_3 to prevent its earlier definition from being dead-code
1026 // eliminated in the presence of its later definition.
1027 Context.insert(InstFakeUse::create(Func, T_2));
1028 _mov(T_2, T_3);
1029 _sar(T_3, SignExtend);
1030 Context.insert(Label);
1031 _mov(DestLo, T_2);
1032 _mov(DestHi, T_3);
1033 } break;
1034 case InstArithmetic::Udiv: {
1035 const SizeT MaxSrcs = 2;
1036 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
1037 Call->addArg(Inst->getSrc(0));
1038 Call->addArg(Inst->getSrc(1));
1039 lowerCall(Call);
1040 } break;
1041 case InstArithmetic::Sdiv: {
1042 const SizeT MaxSrcs = 2;
1043 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
1044 Call->addArg(Inst->getSrc(0));
1045 Call->addArg(Inst->getSrc(1));
1046 lowerCall(Call);
1047 } break;
1048 case InstArithmetic::Urem: {
1049 const SizeT MaxSrcs = 2;
1050 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
1051 Call->addArg(Inst->getSrc(0));
1052 Call->addArg(Inst->getSrc(1));
1053 lowerCall(Call);
1054 } break;
1055 case InstArithmetic::Srem: {
1056 const SizeT MaxSrcs = 2;
1057 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
1058 Call->addArg(Inst->getSrc(0));
1059 Call->addArg(Inst->getSrc(1));
1060 lowerCall(Call);
1061 } break;
1062 case InstArithmetic::Fadd:
1063 case InstArithmetic::Fsub:
1064 case InstArithmetic::Fmul:
1065 case InstArithmetic::Fdiv:
1066 case InstArithmetic::Frem:
1067 llvm_unreachable("FP instruction with i64 type");
1068 break;
1069 }
1070 } else { // Dest->getType() != IceType_i64
1071 Variable *T_edx = NULL;
1072 Variable *T = NULL;
1073 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001074 case InstArithmetic::_num:
1075 llvm_unreachable("Unknown arithmetic operator");
1076 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001077 case InstArithmetic::Add:
1078 _mov(T, Src0);
1079 _add(T, Src1);
1080 _mov(Dest, T);
1081 break;
1082 case InstArithmetic::And:
1083 _mov(T, Src0);
1084 _and(T, Src1);
1085 _mov(Dest, T);
1086 break;
1087 case InstArithmetic::Or:
1088 _mov(T, Src0);
1089 _or(T, Src1);
1090 _mov(Dest, T);
1091 break;
1092 case InstArithmetic::Xor:
1093 _mov(T, Src0);
1094 _xor(T, Src1);
1095 _mov(Dest, T);
1096 break;
1097 case InstArithmetic::Sub:
1098 _mov(T, Src0);
1099 _sub(T, Src1);
1100 _mov(Dest, T);
1101 break;
1102 case InstArithmetic::Mul:
1103 // TODO: Optimize for llvm::isa<Constant>(Src1)
1104 // TODO: Strength-reduce multiplications by a constant,
1105 // particularly -1 and powers of 2. Advanced: use lea to
1106 // multiply by 3, 5, 9.
1107 //
1108 // The 8-bit version of imul only allows the form "imul r/m8"
1109 // where T must be in eax.
1110 if (Dest->getType() == IceType_i8)
1111 _mov(T, Src0, Reg_eax);
1112 else
1113 _mov(T, Src0);
1114 _imul(T, Src1);
1115 _mov(Dest, T);
1116 break;
1117 case InstArithmetic::Shl:
1118 _mov(T, Src0);
1119 if (!llvm::isa<Constant>(Src1))
1120 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1121 _shl(T, Src1);
1122 _mov(Dest, T);
1123 break;
1124 case InstArithmetic::Lshr:
1125 _mov(T, Src0);
1126 if (!llvm::isa<Constant>(Src1))
1127 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1128 _shr(T, Src1);
1129 _mov(Dest, T);
1130 break;
1131 case InstArithmetic::Ashr:
1132 _mov(T, Src0);
1133 if (!llvm::isa<Constant>(Src1))
1134 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1135 _sar(T, Src1);
1136 _mov(Dest, T);
1137 break;
1138 case InstArithmetic::Udiv:
Jan Voung70d68832014-06-17 10:02:37 -07001139 // div and idiv are the few arithmetic operators that do not allow
1140 // immediates as the operand.
1141 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001142 if (Dest->getType() == IceType_i8) {
1143 Variable *T_ah = NULL;
1144 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
1145 _mov(T, Src0, Reg_eax);
1146 _mov(T_ah, Zero, Reg_ah);
1147 _div(T, Src1, T_ah);
1148 _mov(Dest, T);
1149 } else {
1150 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1151 _mov(T, Src0, Reg_eax);
1152 _mov(T_edx, Zero, Reg_edx);
1153 _div(T, Src1, T_edx);
1154 _mov(Dest, T);
1155 }
1156 break;
1157 case InstArithmetic::Sdiv:
Jan Voung70d68832014-06-17 10:02:37 -07001158 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001159 T_edx = makeReg(IceType_i32, Reg_edx);
1160 _mov(T, Src0, Reg_eax);
1161 _cdq(T_edx, T);
1162 _idiv(T, Src1, T_edx);
1163 _mov(Dest, T);
1164 break;
1165 case InstArithmetic::Urem:
Jan Voung70d68832014-06-17 10:02:37 -07001166 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001167 if (Dest->getType() == IceType_i8) {
1168 Variable *T_ah = NULL;
1169 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
1170 _mov(T, Src0, Reg_eax);
1171 _mov(T_ah, Zero, Reg_ah);
1172 _div(T_ah, Src1, T);
1173 _mov(Dest, T_ah);
1174 } else {
1175 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1176 _mov(T_edx, Zero, Reg_edx);
1177 _mov(T, Src0, Reg_eax);
1178 _div(T_edx, Src1, T);
1179 _mov(Dest, T_edx);
1180 }
1181 break;
1182 case InstArithmetic::Srem:
Jan Voung70d68832014-06-17 10:02:37 -07001183 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001184 T_edx = makeReg(IceType_i32, Reg_edx);
1185 _mov(T, Src0, Reg_eax);
1186 _cdq(T_edx, T);
1187 _idiv(T_edx, Src1, T);
1188 _mov(Dest, T_edx);
1189 break;
1190 case InstArithmetic::Fadd:
1191 _mov(T, Src0);
1192 _addss(T, Src1);
1193 _mov(Dest, T);
1194 break;
1195 case InstArithmetic::Fsub:
1196 _mov(T, Src0);
1197 _subss(T, Src1);
1198 _mov(Dest, T);
1199 break;
1200 case InstArithmetic::Fmul:
1201 _mov(T, Src0);
1202 _mulss(T, Src1);
1203 _mov(Dest, T);
1204 break;
1205 case InstArithmetic::Fdiv:
1206 _mov(T, Src0);
1207 _divss(T, Src1);
1208 _mov(Dest, T);
1209 break;
1210 case InstArithmetic::Frem: {
1211 const SizeT MaxSrcs = 2;
1212 Type Ty = Dest->getType();
1213 InstCall *Call =
1214 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);
1215 Call->addArg(Src0);
1216 Call->addArg(Src1);
1217 return lowerCall(Call);
1218 } break;
1219 }
1220 }
1221}
1222
1223void TargetX8632::lowerAssign(const InstAssign *Inst) {
1224 Variable *Dest = Inst->getDest();
1225 Operand *Src0 = Inst->getSrc(0);
1226 assert(Dest->getType() == Src0->getType());
1227 if (Dest->getType() == IceType_i64) {
1228 Src0 = legalize(Src0);
1229 Operand *Src0Lo = loOperand(Src0);
1230 Operand *Src0Hi = hiOperand(Src0);
1231 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1232 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1233 Variable *T_Lo = NULL, *T_Hi = NULL;
1234 _mov(T_Lo, Src0Lo);
1235 _mov(DestLo, T_Lo);
1236 _mov(T_Hi, Src0Hi);
1237 _mov(DestHi, T_Hi);
1238 } else {
1239 const bool AllowOverlap = true;
1240 // RI is either a physical register or an immediate.
1241 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap);
1242 _mov(Dest, RI);
1243 }
1244}
1245
1246void TargetX8632::lowerBr(const InstBr *Inst) {
1247 if (Inst->isUnconditional()) {
1248 _br(Inst->getTargetUnconditional());
1249 } else {
1250 Operand *Src0 = legalize(Inst->getCondition());
1251 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1252 _cmp(Src0, Zero);
1253 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1254 }
1255}
1256
1257void TargetX8632::lowerCall(const InstCall *Instr) {
1258 // Generate a sequence of push instructions, pushing right to left,
1259 // keeping track of stack offsets in case a push involves a stack
1260 // operand and we are using an esp-based frame.
1261 uint32_t StackOffset = 0;
1262 // TODO: If for some reason the call instruction gets dead-code
1263 // eliminated after lowering, we would need to ensure that the
1264 // pre-call push instructions and the post-call esp adjustment get
1265 // eliminated as well.
1266 for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {
1267 Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));
1268 if (Arg->getType() == IceType_i64) {
1269 _push(hiOperand(Arg));
1270 _push(loOperand(Arg));
1271 } else if (Arg->getType() == IceType_f64) {
1272 // If the Arg turns out to be a memory operand, we need to push
1273 // 8 bytes, which requires two push instructions. This ends up
1274 // being somewhat clumsy in the current IR, so we use a
1275 // workaround. Force the operand into a (xmm) register, and
1276 // then push the register. An xmm register push is actually not
1277 // possible in x86, but the Push instruction emitter handles
1278 // this by decrementing the stack pointer and directly writing
1279 // the xmm register value.
1280 Variable *T = NULL;
1281 _mov(T, Arg);
1282 _push(T);
1283 } else {
1284 _push(Arg);
1285 }
1286 StackOffset += typeWidthInBytesOnStack(Arg->getType());
1287 }
1288 // Generate the call instruction. Assign its result to a temporary
1289 // with high register allocation weight.
1290 Variable *Dest = Instr->getDest();
1291 Variable *eax = NULL; // doubles as RegLo as necessary
1292 Variable *edx = NULL;
1293 if (Dest) {
1294 switch (Dest->getType()) {
1295 case IceType_NUM:
1296 llvm_unreachable("Invalid Call dest type");
1297 break;
1298 case IceType_void:
1299 break;
1300 case IceType_i1:
1301 case IceType_i8:
1302 case IceType_i16:
1303 case IceType_i32:
1304 eax = makeReg(Dest->getType(), Reg_eax);
1305 break;
1306 case IceType_i64:
1307 eax = makeReg(IceType_i32, Reg_eax);
1308 edx = makeReg(IceType_i32, Reg_edx);
1309 break;
1310 case IceType_f32:
1311 case IceType_f64:
1312 // Leave eax==edx==NULL, and capture the result with the fstp
1313 // instruction.
1314 break;
1315 }
1316 }
1317 Operand *CallTarget = legalize(Instr->getCallTarget());
1318 Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);
1319 Context.insert(NewCall);
1320 if (edx)
1321 Context.insert(InstFakeDef::create(Func, edx));
1322
1323 // Add the appropriate offset to esp.
1324 if (StackOffset) {
1325 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1326 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
1327 }
1328
1329 // Insert a register-kill pseudo instruction.
1330 VarList KilledRegs;
1331 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1332 if (ScratchRegs[i])
1333 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1334 }
1335 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
1336
1337 // Generate a FakeUse to keep the call live if necessary.
1338 if (Instr->hasSideEffects() && eax) {
1339 Inst *FakeUse = InstFakeUse::create(Func, eax);
1340 Context.insert(FakeUse);
1341 }
1342
1343 // Generate Dest=eax assignment.
1344 if (Dest && eax) {
1345 if (edx) {
1346 split64(Dest);
1347 Variable *DestLo = Dest->getLo();
1348 Variable *DestHi = Dest->getHi();
1349 DestLo->setPreferredRegister(eax, false);
1350 DestHi->setPreferredRegister(edx, false);
1351 _mov(DestLo, eax);
1352 _mov(DestHi, edx);
1353 } else {
1354 Dest->setPreferredRegister(eax, false);
1355 _mov(Dest, eax);
1356 }
1357 }
1358
1359 // Special treatment for an FP function which returns its result in
1360 // st(0).
1361 if (Dest &&
1362 (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) {
1363 _fstp(Dest);
1364 // If Dest ends up being a physical xmm register, the fstp emit
1365 // code will route st(0) through a temporary stack slot.
1366 }
1367}
1368
1369void TargetX8632::lowerCast(const InstCast *Inst) {
1370 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1371 InstCast::OpKind CastKind = Inst->getCastKind();
1372 Variable *Dest = Inst->getDest();
1373 // Src0RM is the source operand legalized to physical register or memory, but
1374 // not immediate, since the relevant x86 native instructions don't allow an
1375 // immediate operand. If the operand is an immediate, we could consider
1376 // computing the strength-reduced result at translation time, but we're
1377 // unlikely to see something like that in the bitcode that the optimizer
1378 // wouldn't have already taken care of.
Jan Voung70d68832014-06-17 10:02:37 -07001379 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001380 switch (CastKind) {
1381 default:
1382 Func->setError("Cast type not supported");
1383 return;
1384 case InstCast::Sext:
1385 if (Dest->getType() == IceType_i64) {
1386 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1387 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1388 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1389 Variable *T_Lo = makeReg(DestLo->getType());
1390 if (Src0RM->getType() == IceType_i32)
1391 _mov(T_Lo, Src0RM);
1392 else
1393 _movsx(T_Lo, Src0RM);
1394 _mov(DestLo, T_Lo);
1395 Variable *T_Hi = NULL;
1396 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);
1397 _mov(T_Hi, T_Lo);
1398 _sar(T_Hi, Shift);
1399 _mov(DestHi, T_Hi);
1400 } else {
1401 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
1402 // also copy to the high operand of a 64-bit variable.
1403 // t1 = movsx src; dst = t1
1404 Variable *T = makeReg(Dest->getType());
1405 _movsx(T, Src0RM);
1406 _mov(Dest, T);
1407 }
1408 break;
1409 case InstCast::Zext:
1410 if (Dest->getType() == IceType_i64) {
1411 // t1=movzx src; dst.lo=t1; dst.hi=0
1412 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1413 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1414 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1415 Variable *Tmp = makeReg(DestLo->getType());
1416 if (Src0RM->getType() == IceType_i32)
1417 _mov(Tmp, Src0RM);
1418 else
1419 _movzx(Tmp, Src0RM);
1420 _mov(DestLo, Tmp);
1421 _mov(DestHi, Zero);
1422 } else if (Src0RM->getType() == IceType_i1) {
1423 // t = Src0RM; t &= 1; Dest = t
1424 Operand *One = Ctx->getConstantInt(IceType_i32, 1);
1425 Variable *T = makeReg(IceType_i32);
1426 _movzx(T, Src0RM);
1427 _and(T, One);
1428 _mov(Dest, T);
1429 } else {
1430 // t1 = movzx src; dst = t1
1431 Variable *T = makeReg(Dest->getType());
1432 _movzx(T, Src0RM);
1433 _mov(Dest, T);
1434 }
1435 break;
1436 case InstCast::Trunc: {
1437 if (Src0RM->getType() == IceType_i64)
1438 Src0RM = loOperand(Src0RM);
1439 // t1 = trunc Src0RM; Dest = t1
1440 Variable *T = NULL;
1441 _mov(T, Src0RM);
1442 _mov(Dest, T);
1443 break;
1444 }
1445 case InstCast::Fptrunc:
1446 case InstCast::Fpext: {
1447 // t1 = cvt Src0RM; Dest = t1
1448 Variable *T = makeReg(Dest->getType());
1449 _cvt(T, Src0RM);
1450 _mov(Dest, T);
1451 break;
1452 }
1453 case InstCast::Fptosi:
1454 if (Dest->getType() == IceType_i64) {
1455 // Use a helper for converting floating-point values to 64-bit
1456 // integers. SSE2 appears to have no way to convert from xmm
1457 // registers to something like the edx:eax register pair, and
1458 // gcc and clang both want to use x87 instructions complete with
1459 // temporary manipulation of the status word. This helper is
1460 // not needed for x86-64.
1461 split64(Dest);
1462 const SizeT MaxSrcs = 1;
1463 Type SrcType = Inst->getSrc(0)->getType();
1464 InstCall *Call = makeHelperCall(
1465 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);
1466 // TODO: Call the correct compiler-rt helper function.
1467 Call->addArg(Inst->getSrc(0));
1468 lowerCall(Call);
1469 } else {
1470 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1471 Variable *T_1 = makeReg(IceType_i32);
1472 Variable *T_2 = makeReg(Dest->getType());
1473 _cvt(T_1, Src0RM);
1474 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1475 _mov(Dest, T_2);
1476 T_2->setPreferredRegister(T_1, true);
1477 }
1478 break;
1479 case InstCast::Fptoui:
1480 if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) {
1481 // Use a helper for both x86-32 and x86-64.
1482 split64(Dest);
1483 const SizeT MaxSrcs = 1;
1484 Type DestType = Dest->getType();
1485 Type SrcType = Src0RM->getType();
1486 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");
1487 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");
1488 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
1489 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
1490 // TODO: Call the correct compiler-rt helper function.
1491 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1492 Call->addArg(Inst->getSrc(0));
1493 lowerCall(Call);
1494 return;
1495 } else {
1496 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1497 Variable *T_1 = makeReg(IceType_i32);
1498 Variable *T_2 = makeReg(Dest->getType());
1499 _cvt(T_1, Src0RM);
1500 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1501 _mov(Dest, T_2);
1502 T_2->setPreferredRegister(T_1, true);
1503 }
1504 break;
1505 case InstCast::Sitofp:
1506 if (Src0RM->getType() == IceType_i64) {
1507 // Use a helper for x86-32.
1508 const SizeT MaxSrcs = 1;
1509 Type DestType = Dest->getType();
1510 InstCall *Call = makeHelperCall(
1511 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);
1512 // TODO: Call the correct compiler-rt helper function.
1513 Call->addArg(Inst->getSrc(0));
1514 lowerCall(Call);
1515 return;
1516 } else {
1517 // Sign-extend the operand.
1518 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
1519 Variable *T_1 = makeReg(IceType_i32);
1520 Variable *T_2 = makeReg(Dest->getType());
1521 if (Src0RM->getType() == IceType_i32)
1522 _mov(T_1, Src0RM);
1523 else
1524 _movsx(T_1, Src0RM);
1525 _cvt(T_2, T_1);
1526 _mov(Dest, T_2);
1527 }
1528 break;
1529 case InstCast::Uitofp:
1530 if (Src0RM->getType() == IceType_i64 || Src0RM->getType() == IceType_i32) {
1531 // Use a helper for x86-32 and x86-64. Also use a helper for
1532 // i32 on x86-32.
1533 const SizeT MaxSrcs = 1;
1534 Type DestType = Dest->getType();
1535 IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");
1536 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");
1537 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
1538 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
1539 // TODO: Call the correct compiler-rt helper function.
1540 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1541 Call->addArg(Inst->getSrc(0));
1542 lowerCall(Call);
1543 return;
1544 } else {
1545 // Zero-extend the operand.
1546 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
1547 Variable *T_1 = makeReg(IceType_i32);
1548 Variable *T_2 = makeReg(Dest->getType());
1549 if (Src0RM->getType() == IceType_i32)
1550 _mov(T_1, Src0RM);
1551 else
1552 _movzx(T_1, Src0RM);
1553 _cvt(T_2, T_1);
1554 _mov(Dest, T_2);
1555 }
1556 break;
1557 case InstCast::Bitcast:
1558 if (Dest->getType() == Src0RM->getType()) {
1559 InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);
1560 lowerAssign(Assign);
1561 return;
1562 }
1563 switch (Dest->getType()) {
1564 default:
1565 llvm_unreachable("Unexpected Bitcast dest type");
1566 case IceType_i32:
1567 case IceType_f32: {
1568 Type DestType = Dest->getType();
1569 Type SrcType = Src0RM->getType();
1570 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
1571 (DestType == IceType_f32 && SrcType == IceType_i32));
1572 // a.i32 = bitcast b.f32 ==>
1573 // t.f32 = b.f32
1574 // s.f32 = spill t.f32
1575 // a.i32 = s.f32
1576 Variable *T = NULL;
1577 // TODO: Should be able to force a spill setup by calling legalize() with
1578 // Legal_Mem and not Legal_Reg or Legal_Imm.
1579 Variable *Spill = Func->makeVariable(SrcType, Context.getNode());
1580 Spill->setWeight(RegWeight::Zero);
1581 Spill->setPreferredRegister(Dest, true);
1582 _mov(T, Src0RM);
1583 _mov(Spill, T);
1584 _mov(Dest, Spill);
1585 } break;
1586 case IceType_i64: {
1587 assert(Src0RM->getType() == IceType_f64);
1588 // a.i64 = bitcast b.f64 ==>
1589 // s.f64 = spill b.f64
1590 // t_lo.i32 = lo(s.f64)
1591 // a_lo.i32 = t_lo.i32
1592 // t_hi.i32 = hi(s.f64)
1593 // a_hi.i32 = t_hi.i32
1594 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1595 Spill->setWeight(RegWeight::Zero);
1596 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
1597 _mov(Spill, Src0RM);
1598
1599 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1600 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1601 Variable *T_Lo = makeReg(IceType_i32);
1602 Variable *T_Hi = makeReg(IceType_i32);
1603 VariableSplit *SpillLo =
1604 VariableSplit::create(Func, Spill, VariableSplit::Low);
1605 VariableSplit *SpillHi =
1606 VariableSplit::create(Func, Spill, VariableSplit::High);
1607
1608 _mov(T_Lo, SpillLo);
1609 _mov(DestLo, T_Lo);
1610 _mov(T_Hi, SpillHi);
1611 _mov(DestHi, T_Hi);
1612 } break;
1613 case IceType_f64: {
1614 assert(Src0RM->getType() == IceType_i64);
1615 // a.f64 = bitcast b.i64 ==>
1616 // t_lo.i32 = b_lo.i32
1617 // lo(s.f64) = t_lo.i32
1618 // FakeUse(s.f64)
1619 // t_hi.i32 = b_hi.i32
1620 // hi(s.f64) = t_hi.i32
1621 // a.f64 = s.f64
1622 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1623 Spill->setWeight(RegWeight::Zero);
1624 Spill->setPreferredRegister(Dest, true);
1625
1626 Context.insert(InstFakeDef::create(Func, Spill));
1627
1628 Variable *T_Lo = NULL, *T_Hi = NULL;
1629 VariableSplit *SpillLo =
1630 VariableSplit::create(Func, Spill, VariableSplit::Low);
1631 VariableSplit *SpillHi =
1632 VariableSplit::create(Func, Spill, VariableSplit::High);
1633 _mov(T_Lo, loOperand(Src0RM));
1634 _store(T_Lo, SpillLo);
1635 _mov(T_Hi, hiOperand(Src0RM));
1636 _store(T_Hi, SpillHi);
1637 _mov(Dest, Spill);
1638 } break;
1639 }
1640 break;
1641 }
1642}
1643
1644void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
1645 Operand *Src0 = Inst->getSrc(0);
1646 Operand *Src1 = Inst->getSrc(1);
1647 Variable *Dest = Inst->getDest();
1648 // Lowering a = fcmp cond, b, c
1649 // ucomiss b, c /* only if C1 != Br_None */
1650 // /* but swap b,c order if SwapOperands==true */
1651 // mov a, <default>
1652 // j<C1> label /* only if C1 != Br_None */
1653 // j<C2> label /* only if C2 != Br_None */
1654 // FakeUse(a) /* only if C1 != Br_None */
1655 // mov a, !<default> /* only if C1 != Br_None */
1656 // label: /* only if C1 != Br_None */
1657 InstFcmp::FCond Condition = Inst->getCondition();
1658 size_t Index = static_cast<size_t>(Condition);
1659 assert(Index < TableFcmpSize);
1660 if (TableFcmp[Index].SwapOperands) {
1661 Operand *Tmp = Src0;
1662 Src0 = Src1;
1663 Src1 = Tmp;
1664 }
1665 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
1666 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
1667 if (HasC1) {
1668 Src0 = legalize(Src0);
1669 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
1670 Variable *T = NULL;
1671 _mov(T, Src0);
1672 _ucomiss(T, Src1RM);
1673 }
1674 Constant *Default =
1675 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
1676 _mov(Dest, Default);
1677 if (HasC1) {
1678 InstX8632Label *Label = InstX8632Label::create(Func, this);
1679 _br(TableFcmp[Index].C1, Label);
1680 if (HasC2) {
1681 _br(TableFcmp[Index].C2, Label);
1682 }
1683 Context.insert(InstFakeUse::create(Func, Dest));
1684 Constant *NonDefault =
1685 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
1686 _mov(Dest, NonDefault);
1687 Context.insert(Label);
1688 }
1689}
1690
1691void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
1692 Operand *Src0 = legalize(Inst->getSrc(0));
1693 Operand *Src1 = legalize(Inst->getSrc(1));
1694 Variable *Dest = Inst->getDest();
1695
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001696 // If Src1 is an immediate, or known to be a physical register, we can
1697 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
1698 // a physical register. (Actually, either Src0 or Src1 can be chosen for
1699 // the physical register, but unfortunately we have to commit to one or
1700 // the other before register allocation.)
1701 bool IsSrc1ImmOrReg = false;
1702 if (llvm::isa<Constant>(Src1)) {
1703 IsSrc1ImmOrReg = true;
1704 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
1705 if (Var->hasReg())
1706 IsSrc1ImmOrReg = true;
1707 }
1708
1709 // Try to fuse a compare immediately followed by a conditional branch. This
1710 // is possible when the compare dest and the branch source operands are the
1711 // same, and are their only uses. TODO: implement this optimization for i64.
1712 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
1713 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
1714 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
1715 Operand *Src0New =
1716 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
1717 _cmp(Src0New, Src1);
1718 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
1719 NextBr->getTargetFalse());
1720 // Skip over the following branch instruction.
1721 NextBr->setDeleted();
1722 Context.advanceNext();
1723 return;
1724 }
1725 }
1726
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001727 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
1728 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1729 Constant *One = Ctx->getConstantInt(IceType_i32, 1);
1730 if (Src0->getType() == IceType_i64) {
1731 InstIcmp::ICond Condition = Inst->getCondition();
1732 size_t Index = static_cast<size_t>(Condition);
1733 assert(Index < TableIcmp64Size);
1734 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
1735 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
1736 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
1737 InstX8632Label *Label = InstX8632Label::create(Func, this);
1738 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
1739 _cmp(loOperand(Src0), Src1LoRI);
1740 _br(InstX8632Br::Br_ne, Label);
1741 _cmp(hiOperand(Src0), Src1HiRI);
1742 _br(InstX8632Br::Br_ne, Label);
1743 Context.insert(InstFakeUse::create(Func, Dest));
1744 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
1745 Context.insert(Label);
1746 } else {
1747 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
1748 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
1749 _mov(Dest, One);
1750 _cmp(hiOperand(Src0), Src1HiRI);
1751 _br(TableIcmp64[Index].C1, LabelTrue);
1752 _br(TableIcmp64[Index].C2, LabelFalse);
1753 _cmp(loOperand(Src0), Src1LoRI);
1754 _br(TableIcmp64[Index].C3, LabelTrue);
1755 Context.insert(LabelFalse);
1756 Context.insert(InstFakeUse::create(Func, Dest));
1757 _mov(Dest, Zero);
1758 Context.insert(LabelTrue);
1759 }
1760 return;
1761 }
1762
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001763 // cmp b, c
1764 Operand *Src0New =
1765 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
1766 InstX8632Label *Label = InstX8632Label::create(Func, this);
1767 _cmp(Src0New, Src1);
1768 _mov(Dest, One);
1769 _br(getIcmp32Mapping(Inst->getCondition()), Label);
1770 Context.insert(InstFakeUse::create(Func, Dest));
1771 _mov(Dest, Zero);
1772 Context.insert(Label);
1773}
1774
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001775namespace {
1776
1777bool isAdd(const Inst *Inst) {
1778 if (const InstArithmetic *Arith =
1779 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
1780 return (Arith->getOp() == InstArithmetic::Add);
1781 }
1782 return false;
1783}
1784
1785void computeAddressOpt(Variable *&Base, Variable *&Index, int32_t &Shift,
1786 int32_t &Offset) {
1787 (void)Offset; // TODO: pattern-match for non-zero offsets.
1788 if (Base == NULL)
1789 return;
1790 // If the Base has more than one use or is live across multiple
1791 // blocks, then don't go further. Alternatively (?), never consider
1792 // a transformation that would change a variable that is currently
1793 // *not* live across basic block boundaries into one that *is*.
1794 if (Base->isMultiblockLife() /* || Base->getUseCount() > 1*/)
1795 return;
1796
1797 while (true) {
1798 // Base is Base=Var ==>
1799 // set Base=Var
1800 const Inst *BaseInst = Base->getDefinition();
1801 Operand *BaseOperand0 = BaseInst ? BaseInst->getSrc(0) : NULL;
1802 Variable *BaseVariable0 = llvm::dyn_cast_or_null<Variable>(BaseOperand0);
1803 // TODO: Helper function for all instances of assignment
1804 // transitivity.
1805 if (BaseInst && llvm::isa<InstAssign>(BaseInst) && BaseVariable0 &&
1806 // TODO: ensure BaseVariable0 stays single-BB
1807 true) {
1808 Base = BaseVariable0;
1809 continue;
1810 }
1811
1812 // Index is Index=Var ==>
1813 // set Index=Var
1814
1815 // Index==NULL && Base is Base=Var1+Var2 ==>
1816 // set Base=Var1, Index=Var2, Shift=0
1817 Operand *BaseOperand1 =
1818 BaseInst && BaseInst->getSrcSize() >= 2 ? BaseInst->getSrc(1) : NULL;
1819 Variable *BaseVariable1 = llvm::dyn_cast_or_null<Variable>(BaseOperand1);
1820 if (Index == NULL && isAdd(BaseInst) && BaseVariable0 && BaseVariable1 &&
1821 // TODO: ensure BaseVariable0 and BaseVariable1 stay single-BB
1822 true) {
1823 Base = BaseVariable0;
1824 Index = BaseVariable1;
1825 Shift = 0; // should already have been 0
1826 continue;
1827 }
1828
1829 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
1830 // Index=Var, Shift+=log2(Const)
1831 const Inst *IndexInst = Index ? Index->getDefinition() : NULL;
1832 if (const InstArithmetic *ArithInst =
1833 llvm::dyn_cast_or_null<InstArithmetic>(IndexInst)) {
1834 Operand *IndexOperand0 = ArithInst->getSrc(0);
1835 Variable *IndexVariable0 = llvm::dyn_cast<Variable>(IndexOperand0);
1836 Operand *IndexOperand1 = ArithInst->getSrc(1);
1837 ConstantInteger *IndexConstant1 =
1838 llvm::dyn_cast<ConstantInteger>(IndexOperand1);
1839 if (ArithInst->getOp() == InstArithmetic::Mul && IndexVariable0 &&
1840 IndexOperand1->getType() == IceType_i32 && IndexConstant1) {
1841 uint64_t Mult = IndexConstant1->getValue();
1842 uint32_t LogMult;
1843 switch (Mult) {
1844 case 1:
1845 LogMult = 0;
1846 break;
1847 case 2:
1848 LogMult = 1;
1849 break;
1850 case 4:
1851 LogMult = 2;
1852 break;
1853 case 8:
1854 LogMult = 3;
1855 break;
1856 default:
1857 LogMult = 4;
1858 break;
1859 }
1860 if (Shift + LogMult <= 3) {
1861 Index = IndexVariable0;
1862 Shift += LogMult;
1863 continue;
1864 }
1865 }
1866 }
1867
1868 // Index is Index=Var<<Const && Const+Shift<=3 ==>
1869 // Index=Var, Shift+=Const
1870
1871 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
1872 // Index=Var, Shift+=log2(Const)
1873
1874 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
1875 // swap(Index,Base)
1876 // Similar for Base=Const*Var and Base=Var<<Const
1877
1878 // Base is Base=Var+Const ==>
1879 // set Base=Var, Offset+=Const
1880
1881 // Base is Base=Const+Var ==>
1882 // set Base=Var, Offset+=Const
1883
1884 // Base is Base=Var-Const ==>
1885 // set Base=Var, Offset-=Const
1886
1887 // Index is Index=Var+Const ==>
1888 // set Index=Var, Offset+=(Const<<Shift)
1889
1890 // Index is Index=Const+Var ==>
1891 // set Index=Var, Offset+=(Const<<Shift)
1892
1893 // Index is Index=Var-Const ==>
1894 // set Index=Var, Offset-=(Const<<Shift)
1895
1896 // TODO: consider overflow issues with respect to Offset.
1897 // TODO: handle symbolic constants.
1898 break;
1899 }
1900}
1901
1902} // anonymous namespace
1903
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001904void TargetX8632::lowerLoad(const InstLoad *Inst) {
1905 // A Load instruction can be treated the same as an Assign
1906 // instruction, after the source operand is transformed into an
1907 // OperandX8632Mem operand. Note that the address mode
1908 // optimization already creates an OperandX8632Mem operand, so it
1909 // doesn't need another level of transformation.
1910 Type Ty = Inst->getDest()->getType();
1911 Operand *Src0 = Inst->getSourceAddress();
1912 // Address mode optimization already creates an OperandX8632Mem
1913 // operand, so it doesn't need another level of transformation.
1914 if (!llvm::isa<OperandX8632Mem>(Src0)) {
1915 Variable *Base = llvm::dyn_cast<Variable>(Src0);
1916 Constant *Offset = llvm::dyn_cast<Constant>(Src0);
1917 assert(Base || Offset);
1918 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);
1919 }
1920
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001921 // Fuse this load with a subsequent Arithmetic instruction in the
1922 // following situations:
1923 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
1924 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
1925 //
1926 // TODO: Clean up and test thoroughly.
1927 //
1928 // TODO: Why limit to Arithmetic instructions? This could probably be
1929 // applied to most any instruction type. Look at all source operands
1930 // in the following instruction, and if there is one instance of the
1931 // load instruction's dest variable, and that instruction ends that
1932 // variable's live range, then make the substitution. Deal with
1933 // commutativity optimization in the arithmetic instruction lowering.
1934 InstArithmetic *NewArith = NULL;
1935 if (InstArithmetic *Arith =
1936 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
1937 Variable *DestLoad = Inst->getDest();
1938 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
1939 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
1940 if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
1941 DestLoad != Src0Arith) {
1942 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
1943 Arith->getSrc(0), Src0);
1944 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
1945 Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
1946 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
1947 Arith->getSrc(1), Src0);
1948 }
1949 if (NewArith) {
1950 Arith->setDeleted();
1951 Context.advanceNext();
1952 lowerArithmetic(NewArith);
1953 return;
1954 }
1955 }
1956
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001957 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
1958 lowerAssign(Assign);
1959}
1960
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001961void TargetX8632::doAddressOptLoad() {
1962 Inst *Inst = *Context.getCur();
1963 Variable *Dest = Inst->getDest();
1964 Operand *Addr = Inst->getSrc(0);
1965 Variable *Index = NULL;
1966 int32_t Shift = 0;
1967 int32_t Offset = 0; // TODO: make Constant
1968 Variable *Base = llvm::dyn_cast<Variable>(Addr);
1969 computeAddressOpt(Base, Index, Shift, Offset);
1970 if (Base && Addr != Base) {
1971 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
1972 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
1973 Shift);
1974 Inst->setDeleted();
1975 Context.insert(InstLoad::create(Func, Dest, Addr));
1976 }
1977}
1978
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001979void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
1980 Func->setError("Phi found in regular instruction list");
1981}
1982
1983void TargetX8632::lowerRet(const InstRet *Inst) {
1984 Variable *Reg = NULL;
1985 if (Inst->hasRetValue()) {
1986 Operand *Src0 = legalize(Inst->getRetValue());
1987 if (Src0->getType() == IceType_i64) {
1988 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);
1989 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);
1990 Reg = eax;
1991 Context.insert(InstFakeUse::create(Func, edx));
1992 } else if (Src0->getType() == IceType_f32 ||
1993 Src0->getType() == IceType_f64) {
1994 _fld(Src0);
1995 } else {
1996 _mov(Reg, Src0, Reg_eax);
1997 }
1998 }
1999 _ret(Reg);
2000 // Add a fake use of esp to make sure esp stays alive for the entire
2001 // function. Otherwise post-call esp adjustments get dead-code
2002 // eliminated. TODO: Are there more places where the fake use
2003 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
2004 // have a ret instruction.
2005 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
2006 Context.insert(InstFakeUse::create(Func, esp));
2007}
2008
2009void TargetX8632::lowerSelect(const InstSelect *Inst) {
2010 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
2011 Variable *Dest = Inst->getDest();
2012 Operand *SrcT = Inst->getTrueOperand();
2013 Operand *SrcF = Inst->getFalseOperand();
2014 Operand *Condition = legalize(Inst->getCondition());
2015 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
2016 InstX8632Label *Label = InstX8632Label::create(Func, this);
2017
2018 if (Dest->getType() == IceType_i64) {
2019 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2020 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2021 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
2022 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
2023 _cmp(Condition, Zero);
2024 _mov(DestLo, SrcLoRI);
2025 _mov(DestHi, SrcHiRI);
2026 _br(InstX8632Br::Br_ne, Label);
2027 Context.insert(InstFakeUse::create(Func, DestLo));
2028 Context.insert(InstFakeUse::create(Func, DestHi));
2029 Operand *SrcFLo = loOperand(SrcF);
2030 Operand *SrcFHi = hiOperand(SrcF);
2031 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);
2032 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);
2033 _mov(DestLo, SrcLoRI);
2034 _mov(DestHi, SrcHiRI);
2035 } else {
2036 _cmp(Condition, Zero);
2037 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
2038 _mov(Dest, SrcT);
2039 _br(InstX8632Br::Br_ne, Label);
2040 Context.insert(InstFakeUse::create(Func, Dest));
2041 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);
2042 _mov(Dest, SrcF);
2043 }
2044
2045 Context.insert(Label);
2046}
2047
2048void TargetX8632::lowerStore(const InstStore *Inst) {
2049 Operand *Value = Inst->getData();
2050 Operand *Addr = Inst->getAddr();
2051 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);
2052 // Address mode optimization already creates an OperandX8632Mem
2053 // operand, so it doesn't need another level of transformation.
2054 if (!NewAddr) {
2055 // The address will be either a constant (which represents a global
2056 // variable) or a variable, so either the Base or Offset component
2057 // of the OperandX8632Mem will be set.
2058 Variable *Base = llvm::dyn_cast<Variable>(Addr);
2059 Constant *Offset = llvm::dyn_cast<Constant>(Addr);
2060 assert(Base || Offset);
2061 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);
2062 }
2063 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));
2064
2065 if (NewAddr->getType() == IceType_i64) {
2066 Value = legalize(Value);
2067 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
2068 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
2069 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
2070 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
2071 } else {
2072 Value = legalize(Value, Legal_Reg | Legal_Imm, true);
2073 _store(Value, NewAddr);
2074 }
2075}
2076
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002077void TargetX8632::doAddressOptStore() {
2078 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur());
2079 Operand *Data = Inst->getData();
2080 Operand *Addr = Inst->getAddr();
2081 Variable *Index = NULL;
2082 int32_t Shift = 0;
2083 int32_t Offset = 0; // TODO: make Constant
2084 Variable *Base = llvm::dyn_cast<Variable>(Addr);
2085 computeAddressOpt(Base, Index, Shift, Offset);
2086 if (Base && Addr != Base) {
2087 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
2088 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
2089 Shift);
2090 Inst->setDeleted();
2091 Context.insert(InstStore::create(Func, Data, Addr));
2092 }
2093}
2094
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002095void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
2096 // This implements the most naive possible lowering.
2097 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
2098 Operand *Src0 = Inst->getComparison();
2099 SizeT NumCases = Inst->getNumCases();
2100 // OK, we'll be slightly less naive by forcing Src into a physical
2101 // register if there are 2 or more uses.
2102 if (NumCases >= 2)
2103 Src0 = legalizeToVar(Src0, true);
2104 else
2105 Src0 = legalize(Src0, Legal_All, true);
2106 for (SizeT I = 0; I < NumCases; ++I) {
2107 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
2108 _cmp(Src0, Value);
2109 _br(InstX8632Br::Br_e, Inst->getLabel(I));
2110 }
2111
2112 _br(Inst->getLabelDefault());
2113}
2114
2115void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
2116 const SizeT MaxSrcs = 0;
2117 Variable *Dest = NULL;
2118 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
2119 lowerCall(Call);
2120}
2121
2122Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
2123 bool AllowOverlap, int32_t RegNum) {
2124 // Assert that a physical register is allowed. To date, all calls
2125 // to legalize() allow a physical register. If a physical register
2126 // needs to be explicitly disallowed, then new code will need to be
2127 // written to force a spill.
2128 assert(Allowed & Legal_Reg);
2129 // If we're asking for a specific physical register, make sure we're
2130 // not allowing any other operand kinds. (This could be future
2131 // work, e.g. allow the shl shift amount to be either an immediate
2132 // or in ecx.)
2133 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
2134 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
2135 // Before doing anything with a Mem operand, we need to ensure
2136 // that the Base and Index components are in physical registers.
2137 Variable *Base = Mem->getBase();
2138 Variable *Index = Mem->getIndex();
2139 Variable *RegBase = NULL;
2140 Variable *RegIndex = NULL;
2141 if (Base) {
2142 RegBase = legalizeToVar(Base, true);
2143 }
2144 if (Index) {
2145 RegIndex = legalizeToVar(Index, true);
2146 }
2147 if (Base != RegBase || Index != RegIndex) {
2148 From =
2149 OperandX8632Mem::create(Func, Mem->getType(), RegBase,
2150 Mem->getOffset(), RegIndex, Mem->getShift());
2151 }
2152
2153 if (!(Allowed & Legal_Mem)) {
2154 Variable *Reg = makeReg(From->getType(), RegNum);
2155 _mov(Reg, From, RegNum);
2156 From = Reg;
2157 }
2158 return From;
2159 }
2160 if (llvm::isa<Constant>(From)) {
2161 if (!(Allowed & Legal_Imm)) {
2162 Variable *Reg = makeReg(From->getType(), RegNum);
2163 _mov(Reg, From);
2164 From = Reg;
2165 }
2166 return From;
2167 }
2168 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
2169 // We need a new physical register for the operand if:
2170 // Mem is not allowed and Var->getRegNum() is unknown, or
2171 // RegNum is required and Var->getRegNum() doesn't match.
2172 if ((!(Allowed & Legal_Mem) && !Var->hasReg()) ||
2173 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
2174 Variable *Reg = makeReg(From->getType(), RegNum);
2175 if (RegNum == Variable::NoRegister) {
2176 Reg->setPreferredRegister(Var, AllowOverlap);
2177 }
2178 _mov(Reg, From);
2179 From = Reg;
2180 }
2181 return From;
2182 }
2183 llvm_unreachable("Unhandled operand kind in legalize()");
2184 return From;
2185}
2186
2187// Provide a trivial wrapper to legalize() for this common usage.
2188Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,
2189 int32_t RegNum) {
2190 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
2191}
2192
2193Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
2194 Variable *Reg = Func->makeVariable(Type, Context.getNode());
2195 if (RegNum == Variable::NoRegister)
2196 Reg->setWeightInfinite();
2197 else
2198 Reg->setRegNum(RegNum);
2199 return Reg;
2200}
2201
2202void TargetX8632::postLower() {
2203 if (Ctx->getOptLevel() != Opt_m1)
2204 return;
2205 // TODO: Avoid recomputing WhiteList every instruction.
2206 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);
2207 // Make one pass to black-list pre-colored registers. TODO: If
2208 // there was some prior register allocation pass that made register
2209 // assignments, those registers need to be black-listed here as
2210 // well.
2211 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
2212 ++I) {
2213 const Inst *Inst = *I;
2214 if (Inst->isDeleted())
2215 continue;
2216 if (llvm::isa<InstFakeKill>(Inst))
2217 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002218 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
2219 Operand *Src = Inst->getSrc(SrcNum);
2220 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002221 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002222 const Variable *Var = Src->getVar(J);
2223 if (!Var->hasReg())
2224 continue;
2225 WhiteList[Var->getRegNum()] = false;
2226 }
2227 }
2228 }
2229 // The second pass colors infinite-weight variables.
2230 llvm::SmallBitVector AvailableRegisters = WhiteList;
2231 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
2232 ++I) {
2233 const Inst *Inst = *I;
2234 if (Inst->isDeleted())
2235 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002236 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
2237 Operand *Src = Inst->getSrc(SrcNum);
2238 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002239 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002240 Variable *Var = Src->getVar(J);
2241 if (Var->hasReg())
2242 continue;
2243 if (!Var->getWeight().isInf())
2244 continue;
2245 llvm::SmallBitVector AvailableTypedRegisters =
2246 AvailableRegisters & getRegisterSetForType(Var->getType());
2247 if (!AvailableTypedRegisters.any()) {
2248 // This is a hack in case we run out of physical registers
2249 // due to an excessive number of "push" instructions from
2250 // lowering a call.
2251 AvailableRegisters = WhiteList;
2252 AvailableTypedRegisters =
2253 AvailableRegisters & getRegisterSetForType(Var->getType());
2254 }
2255 assert(AvailableTypedRegisters.any());
2256 int32_t RegNum = AvailableTypedRegisters.find_first();
2257 Var->setRegNum(RegNum);
2258 AvailableRegisters[RegNum] = false;
2259 }
2260 }
2261 }
2262}
2263
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002264template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
2265 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07002266 // It would be better to prefix with ".L$" instead of "L$", but
2267 // llvm-mc doesn't parse "dword ptr [.L$foo]".
2268 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
2269}
2270
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002271template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
2272 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07002273 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
2274}
2275
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002276} // end of namespace Ice