blob: 4a719d447c26eeb3e59bc6cee1d1c5971d38ee45 [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
12// high-level instruction. It also implements
13// TargetX8632Fast::postLower() which does the simplest possible
14// register allocation for the "fast" target.
15//
16//===----------------------------------------------------------------------===//
17
18#include "IceDefs.h"
19#include "IceCfg.h"
20#include "IceCfgNode.h"
21#include "IceInstX8632.h"
22#include "IceOperand.h"
23#include "IceTargetLoweringX8632.def"
24#include "IceTargetLoweringX8632.h"
25
26namespace Ice {
27
28namespace {
29
Matt Walace0ca8f2014-07-24 12:34:20 -070030// The following table summarizes the logic for lowering the fcmp
31// instruction. There is one table entry for each of the 16 conditions.
32//
33// The first four columns describe the case when the operands are
34// floating point scalar values. A comment in lowerFcmp() describes the
35// lowering template. In the most general case, there is a compare
36// followed by two conditional branches, because some fcmp conditions
37// don't map to a single x86 conditional branch. However, in many cases
38// it is possible to swap the operands in the comparison and have a
39// single conditional branch. Since it's quite tedious to validate the
40// table by hand, good execution tests are helpful.
41//
42// The last two columns describe the case when the operands are vectors
43// of floating point values. For most fcmp conditions, there is a clear
44// mapping to a single x86 cmpps instruction variant. Some fcmp
45// conditions require special code to handle and these are marked in the
46// table with a Cmpps_Invalid predicate.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070047const struct TableFcmp_ {
48 uint32_t Default;
Matt Walace0ca8f2014-07-24 12:34:20 -070049 bool SwapScalarOperands;
Jan Vounge4da26f2014-07-15 17:52:39 -070050 InstX8632::BrCond C1, C2;
Matt Walace0ca8f2014-07-24 12:34:20 -070051 bool SwapVectorOperands;
52 InstX8632Cmpps::CmppsCond Predicate;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070053} TableFcmp[] = {
Matt Walace0ca8f2014-07-24 12:34:20 -070054#define X(val, dflt, swapS, C1, C2, swapV, pred) \
55 { \
56 dflt, swapS, InstX8632Br::C1, InstX8632Br::C2, swapV, InstX8632Cmpps::pred \
57 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070058 ,
Matt Walace0ca8f2014-07-24 12:34:20 -070059 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070060#undef X
Matt Walace0ca8f2014-07-24 12:34:20 -070061};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070062const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
63
64// The following table summarizes the logic for lowering the icmp instruction
65// for i32 and narrower types. Each icmp condition has a clear mapping to an
66// x86 conditional branch instruction.
67
68const struct TableIcmp32_ {
Jan Vounge4da26f2014-07-15 17:52:39 -070069 InstX8632::BrCond Mapping;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070070} TableIcmp32[] = {
71#define X(val, C_32, C1_64, C2_64, C3_64) \
72 { InstX8632Br::C_32 } \
73 ,
74 ICMPX8632_TABLE
75#undef X
76 };
77const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
78
79// The following table summarizes the logic for lowering the icmp instruction
80// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
81// conditional branches are needed. For the other conditions, three separate
82// conditional branches are needed.
83const struct TableIcmp64_ {
Jan Vounge4da26f2014-07-15 17:52:39 -070084 InstX8632::BrCond C1, C2, C3;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070085} TableIcmp64[] = {
86#define X(val, C_32, C1_64, C2_64, C3_64) \
87 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
88 ,
89 ICMPX8632_TABLE
90#undef X
91 };
92const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
93
Jan Vounge4da26f2014-07-15 17:52:39 -070094InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070095 size_t Index = static_cast<size_t>(Cond);
96 assert(Index < TableIcmp32Size);
97 return TableIcmp32[Index].Mapping;
98}
99
Matt Wala49889232014-07-18 12:45:09 -0700100const struct TableTypeX8632Attributes_ {
101 Type InVectorElementType;
102} TableTypeX8632Attributes[] = {
103#define X(tag, elementty, cvt, sdss, pack, width) \
104 { elementty } \
105 ,
106 ICETYPEX8632_TABLE
107#undef X
108 };
109const size_t TableTypeX8632AttributesSize =
110 llvm::array_lengthof(TableTypeX8632Attributes);
111
112// Return the type which the elements of the vector have in the X86
113// representation of the vector.
114Type getInVectorElementType(Type Ty) {
115 assert(isVectorType(Ty));
116 size_t Index = static_cast<size_t>(Ty);
117 assert(Index < TableTypeX8632AttributesSize);
118 return TableTypeX8632Attributes[Ty].InVectorElementType;
119}
120
Matt Wala45a06232014-07-09 16:33:22 -0700121// The maximum number of arguments to pass in XMM registers
122const unsigned X86_MAX_XMM_ARGS = 4;
Matt Wala83b80362014-07-16 10:21:30 -0700123// The number of bits in a byte
124const unsigned X86_CHAR_BIT = 8;
Matt Wala45a06232014-07-09 16:33:22 -0700125
Matt Wala7fa22d82014-07-17 12:41:31 -0700126// Return a string representation of the type that is suitable for use
127// in an identifier.
128IceString typeIdentString(const Type Ty) {
129 IceString Str;
130 llvm::raw_string_ostream BaseOS(Str);
131 Ostream OS(&BaseOS);
132 if (isVectorType(Ty)) {
133 OS << "v" << typeNumElements(Ty) << typeElementType(Ty);
134 } else {
135 OS << Ty;
136 }
137 return BaseOS.str();
138}
139
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700140// In some cases, there are x-macros tables for both high-level and
141// low-level instructions/operands that use the same enum key value.
142// The tables are kept separate to maintain a proper separation
143// between abstraction layers. There is a risk that the tables
144// could get out of sync if enum values are reordered or if entries
145// are added or deleted. This dummy function uses static_assert to
146// ensure everything is kept in sync.
147void xMacroIntegrityCheck() {
148 // Validate the enum values in FCMPX8632_TABLE.
149 {
150 // Define a temporary set of enum values based on low-level
151 // table entries.
152 enum _tmp_enum {
Matt Walace0ca8f2014-07-24 12:34:20 -0700153#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700154 FCMPX8632_TABLE
155#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700156 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700157 };
158// Define a set of constants based on high-level table entries.
159#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
160 ICEINSTFCMP_TABLE;
161#undef X
162// Define a set of constants based on low-level table entries,
163// and ensure the table entry keys are consistent.
Matt Walace0ca8f2014-07-24 12:34:20 -0700164#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700165 static const int _table2_##val = _tmp_##val; \
166 STATIC_ASSERT(_table1_##val == _table2_##val);
167 FCMPX8632_TABLE;
168#undef X
169// Repeat the static asserts with respect to the high-level
170// table entries in case the high-level table has extra entries.
171#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
172 ICEINSTFCMP_TABLE;
173#undef X
174 }
175
176 // Validate the enum values in ICMPX8632_TABLE.
177 {
178 // Define a temporary set of enum values based on low-level
179 // table entries.
180 enum _tmp_enum {
181#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
182 ICMPX8632_TABLE
183#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700184 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700185 };
186// Define a set of constants based on high-level table entries.
187#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
188 ICEINSTICMP_TABLE;
189#undef X
190// Define a set of constants based on low-level table entries,
191// and ensure the table entry keys are consistent.
192#define X(val, C_32, C1_64, C2_64, C3_64) \
193 static const int _table2_##val = _tmp_##val; \
194 STATIC_ASSERT(_table1_##val == _table2_##val);
195 ICMPX8632_TABLE;
196#undef X
197// Repeat the static asserts with respect to the high-level
198// table entries in case the high-level table has extra entries.
199#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
200 ICEINSTICMP_TABLE;
201#undef X
202 }
203
204 // Validate the enum values in ICETYPEX8632_TABLE.
205 {
206 // Define a temporary set of enum values based on low-level
207 // table entries.
208 enum _tmp_enum {
Matt Wala49889232014-07-18 12:45:09 -0700209#define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700210 ICETYPEX8632_TABLE
211#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700212 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700213 };
214// Define a set of constants based on high-level table entries.
Matt Wala928f1292014-07-07 16:50:46 -0700215#define X(tag, size, align, elts, elty, str) \
216 static const int _table1_##tag = tag;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700217 ICETYPE_TABLE;
218#undef X
219// Define a set of constants based on low-level table entries,
220// and ensure the table entry keys are consistent.
Matt Wala49889232014-07-18 12:45:09 -0700221#define X(tag, elementty, cvt, sdss, pack, width) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700222 static const int _table2_##tag = _tmp_##tag; \
223 STATIC_ASSERT(_table1_##tag == _table2_##tag);
224 ICETYPEX8632_TABLE;
225#undef X
226// Repeat the static asserts with respect to the high-level
227// table entries in case the high-level table has extra entries.
Matt Wala928f1292014-07-07 16:50:46 -0700228#define X(tag, size, align, elts, elty, str) \
229 STATIC_ASSERT(_table1_##tag == _table2_##tag);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700230 ICETYPE_TABLE;
231#undef X
232 }
233}
234
235} // end of anonymous namespace
236
237TargetX8632::TargetX8632(Cfg *Func)
238 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),
239 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
240 PhysicalRegisters(VarList(Reg_NUM)) {
241 // TODO: Don't initialize IntegerRegisters and friends every time.
242 // Instead, initialize in some sort of static initializer for the
243 // class.
244 llvm::SmallBitVector IntegerRegisters(Reg_NUM);
245 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
246 llvm::SmallBitVector FloatRegisters(Reg_NUM);
Matt Wala928f1292014-07-07 16:50:46 -0700247 llvm::SmallBitVector VectorRegisters(Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700248 llvm::SmallBitVector InvalidRegisters(Reg_NUM);
249 ScratchRegs.resize(Reg_NUM);
250#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
251 frameptr, isI8, isInt, isFP) \
252 IntegerRegisters[val] = isInt; \
253 IntegerRegistersI8[val] = isI8; \
254 FloatRegisters[val] = isFP; \
Matt Wala928f1292014-07-07 16:50:46 -0700255 VectorRegisters[val] = isFP; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700256 ScratchRegs[val] = scratch;
257 REGX8632_TABLE;
258#undef X
259 TypeToRegisterSet[IceType_void] = InvalidRegisters;
260 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
261 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
262 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
263 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
264 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
265 TypeToRegisterSet[IceType_f32] = FloatRegisters;
266 TypeToRegisterSet[IceType_f64] = FloatRegisters;
Matt Wala928f1292014-07-07 16:50:46 -0700267 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
268 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
269 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
270 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
271 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
272 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
273 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700274}
275
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700276void TargetX8632::translateO2() {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700277 GlobalContext *Context = Func->getContext();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700278
279 // Lower Phi instructions.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700280 Timer T_placePhiLoads;
281 Func->placePhiLoads();
282 if (Func->hasError())
283 return;
284 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
285 Timer T_placePhiStores;
286 Func->placePhiStores();
287 if (Func->hasError())
288 return;
289 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
290 Timer T_deletePhis;
291 Func->deletePhis();
292 if (Func->hasError())
293 return;
294 T_deletePhis.printElapsedUs(Context, "deletePhis()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700295 Func->dump("After Phi lowering");
296
297 // Address mode optimization.
298 Timer T_doAddressOpt;
299 Func->doAddressOpt();
300 T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()");
301
Matt Wala45a06232014-07-09 16:33:22 -0700302 // Argument lowering
303 Timer T_argLowering;
304 Func->doArgLowering();
305 T_argLowering.printElapsedUs(Context, "lowerArguments()");
306
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700307 // Target lowering. This requires liveness analysis for some parts
308 // of the lowering decisions, such as compare/branch fusing. If
309 // non-lightweight liveness analysis is used, the instructions need
310 // to be renumbered first. TODO: This renumbering should only be
311 // necessary if we're actually calculating live intervals, which we
312 // only do for register allocation.
313 Timer T_renumber1;
314 Func->renumberInstructions();
315 if (Func->hasError())
316 return;
317 T_renumber1.printElapsedUs(Context, "renumberInstructions()");
Matt Wala45a06232014-07-09 16:33:22 -0700318
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700319 // TODO: It should be sufficient to use the fastest liveness
320 // calculation, i.e. livenessLightweight(). However, for some
321 // reason that slows down the rest of the translation. Investigate.
322 Timer T_liveness1;
323 Func->liveness(Liveness_Basic);
324 if (Func->hasError())
325 return;
326 T_liveness1.printElapsedUs(Context, "liveness()");
327 Func->dump("After x86 address mode opt");
Matt Wala45a06232014-07-09 16:33:22 -0700328
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700329 Timer T_genCode;
330 Func->genCode();
331 if (Func->hasError())
332 return;
333 T_genCode.printElapsedUs(Context, "genCode()");
334
335 // Register allocation. This requires instruction renumbering and
336 // full liveness analysis.
337 Timer T_renumber2;
338 Func->renumberInstructions();
339 if (Func->hasError())
340 return;
341 T_renumber2.printElapsedUs(Context, "renumberInstructions()");
342 Timer T_liveness2;
343 Func->liveness(Liveness_Intervals);
344 if (Func->hasError())
345 return;
346 T_liveness2.printElapsedUs(Context, "liveness()");
347 // Validate the live range computations. Do it outside the timing
348 // code. TODO: Put this under a flag.
349 bool ValidLiveness = Func->validateLiveness();
350 assert(ValidLiveness);
351 (void)ValidLiveness; // used only in assert()
352 ComputedLiveRanges = true;
353 // The post-codegen dump is done here, after liveness analysis and
354 // associated cleanup, to make the dump cleaner and more useful.
355 Func->dump("After initial x8632 codegen");
356 Timer T_regAlloc;
357 regAlloc();
358 if (Func->hasError())
359 return;
360 T_regAlloc.printElapsedUs(Context, "regAlloc()");
361 Func->dump("After linear scan regalloc");
362
363 // Stack frame mapping.
364 Timer T_genFrame;
365 Func->genFrame();
366 if (Func->hasError())
367 return;
368 T_genFrame.printElapsedUs(Context, "genFrame()");
369 Func->dump("After stack frame mapping");
370}
371
372void TargetX8632::translateOm1() {
373 GlobalContext *Context = Func->getContext();
374 Timer T_placePhiLoads;
375 Func->placePhiLoads();
376 if (Func->hasError())
377 return;
378 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
379 Timer T_placePhiStores;
380 Func->placePhiStores();
381 if (Func->hasError())
382 return;
383 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
384 Timer T_deletePhis;
385 Func->deletePhis();
386 if (Func->hasError())
387 return;
388 T_deletePhis.printElapsedUs(Context, "deletePhis()");
389 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700390
Matt Wala45a06232014-07-09 16:33:22 -0700391 Timer T_argLowering;
392 Func->doArgLowering();
393 T_argLowering.printElapsedUs(Context, "lowerArguments()");
394
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700395 Timer T_genCode;
396 Func->genCode();
397 if (Func->hasError())
398 return;
399 T_genCode.printElapsedUs(Context, "genCode()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700400 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700401
402 Timer T_genFrame;
403 Func->genFrame();
404 if (Func->hasError())
405 return;
406 T_genFrame.printElapsedUs(Context, "genFrame()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700407 Func->dump("After stack frame mapping");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700408}
409
410IceString TargetX8632::RegNames[] = {
411#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
412 frameptr, isI8, isInt, isFP) \
413 name,
414 REGX8632_TABLE
415#undef X
416};
417
418Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
419 assert(RegNum < PhysicalRegisters.size());
420 Variable *Reg = PhysicalRegisters[RegNum];
421 if (Reg == NULL) {
422 CfgNode *Node = NULL; // NULL means multi-block lifetime
423 Reg = Func->makeVariable(IceType_i32, Node);
424 Reg->setRegNum(RegNum);
425 PhysicalRegisters[RegNum] = Reg;
426 }
427 return Reg;
428}
429
430IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
431 assert(RegNum < Reg_NUM);
432 static IceString RegNames8[] = {
433#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
434 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700435 name8,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700436 REGX8632_TABLE
437#undef X
438 };
439 static IceString RegNames16[] = {
440#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
441 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700442 name16,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700443 REGX8632_TABLE
444#undef X
445 };
446 switch (Ty) {
447 case IceType_i1:
448 case IceType_i8:
449 return RegNames8[RegNum];
450 case IceType_i16:
451 return RegNames16[RegNum];
452 default:
453 return RegNames[RegNum];
454 }
455}
456
457void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const {
458 Ostream &Str = Ctx->getStrEmit();
459 assert(Var->getLocalUseNode() == NULL ||
460 Var->getLocalUseNode() == Func->getCurrentNode());
461 if (Var->hasReg()) {
462 Str << getRegName(Var->getRegNum(), Var->getType());
463 return;
464 }
465 Str << InstX8632::getWidthString(Var->getType());
466 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700467 int32_t Offset = Var->getStackOffset();
468 if (!hasFramePointer())
469 Offset += getStackAdjustment();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700470 if (Offset) {
471 if (Offset > 0)
472 Str << "+";
473 Str << Offset;
474 }
475 Str << "]";
476}
477
Matt Wala45a06232014-07-09 16:33:22 -0700478void TargetX8632::lowerArguments() {
479 VarList &Args = Func->getArgs();
480 // The first four arguments of vector type, regardless of their
481 // position relative to the other arguments in the argument list, are
482 // passed in registers xmm0 - xmm3.
483 unsigned NumXmmArgs = 0;
484
485 Context.init(Func->getEntryNode());
486 Context.setInsertPoint(Context.getCur());
487
488 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
489 ++I) {
490 Variable *Arg = Args[I];
491 Type Ty = Arg->getType();
492 if (!isVectorType(Ty))
493 continue;
494 // Replace Arg in the argument list with the home register. Then
495 // generate an instruction in the prolog to copy the home register
496 // to the assigned location of Arg.
497 int32_t RegNum = Reg_xmm0 + NumXmmArgs;
498 ++NumXmmArgs;
499 IceString Name = "home_reg:" + Arg->getName();
500 const CfgNode *DefNode = NULL;
501 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);
502 RegisterArg->setRegNum(RegNum);
503 RegisterArg->setIsArg(Func);
504 Arg->setIsArg(Func, false);
505
506 Args[I] = RegisterArg;
507 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
508 }
509}
510
511// Helper function for addProlog().
512//
513// This assumes Arg is an argument passed on the stack. This sets the
514// frame offset for Arg and updates InArgsSizeBytes according to Arg's
515// width. For an I64 arg that has been split into Lo and Hi components,
516// it calls itself recursively on the components, taking care to handle
517// Lo first because of the little-endian architecture. Lastly, this
518// function generates an instruction to copy Arg into its assigned
519// register if applicable.
520void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
521 size_t BasicFrameOffset,
522 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700523 Variable *Lo = Arg->getLo();
524 Variable *Hi = Arg->getHi();
525 Type Ty = Arg->getType();
526 if (Lo && Hi && Ty == IceType_i64) {
527 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
528 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
Matt Wala45a06232014-07-09 16:33:22 -0700529 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
530 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700531 return;
532 }
533 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
Matt Wala45a06232014-07-09 16:33:22 -0700534 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700535 if (Arg->hasReg()) {
536 assert(Ty != IceType_i64);
537 OperandX8632Mem *Mem = OperandX8632Mem::create(
538 Func, Ty, FramePtr,
539 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
Matt Wala45a06232014-07-09 16:33:22 -0700540 if (isVectorType(Arg->getType())) {
541 _movp(Arg, Mem);
542 } else {
543 _mov(Arg, Mem);
544 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700545 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700546}
547
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700548Type TargetX8632::stackSlotType() { return IceType_i32; }
549
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700550void TargetX8632::addProlog(CfgNode *Node) {
551 // If SimpleCoalescing is false, each variable without a register
552 // gets its own unique stack slot, which leads to large stack
553 // frames. If SimpleCoalescing is true, then each "global" variable
554 // without a register gets its own slot, but "local" variable slots
555 // are reused across basic blocks. E.g., if A and B are local to
556 // block 1 and C is local to block 2, then C may share a slot with A
557 // or B.
558 const bool SimpleCoalescing = true;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700559 size_t InArgsSizeBytes = 0;
560 size_t RetIpSizeBytes = 4;
561 size_t PreservedRegsSizeBytes = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700562 LocalsSizeBytes = 0;
563 Context.init(Node);
564 Context.setInsertPoint(Context.getCur());
565
566 // Determine stack frame offsets for each Variable without a
567 // register assignment. This can be done as one variable per stack
568 // slot. Or, do coalescing by running the register allocator again
569 // with an infinite set of registers (as a side effect, this gives
570 // variables a second chance at physical register assignment).
571 //
572 // A middle ground approach is to leverage sparsity and allocate one
573 // block of space on the frame for globals (variables with
574 // multi-block lifetime), and one block to share for locals
575 // (single-block lifetime).
576
577 llvm::SmallBitVector CalleeSaves =
578 getRegisterSet(RegSet_CalleeSave, RegSet_None);
579
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700580 size_t GlobalsSize = 0;
581 std::vector<size_t> LocalsSize(Func->getNumNodes());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700582
583 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
584 // LocalsSizeBytes.
585 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
586 const VarList &Variables = Func->getVariables();
587 const VarList &Args = Func->getArgs();
588 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
589 I != E; ++I) {
590 Variable *Var = *I;
591 if (Var->hasReg()) {
592 RegsUsed[Var->getRegNum()] = true;
593 continue;
594 }
Matt Wala45a06232014-07-09 16:33:22 -0700595 // An argument either does not need a stack slot (if passed in a
596 // register) or already has one (if passed on the stack).
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700597 if (Var->getIsArg())
598 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700599 // An unreferenced variable doesn't need a stack slot.
600 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
601 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700602 // A spill slot linked to a variable with a stack slot should reuse
603 // that stack slot.
604 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
605 if (Variable *Linked = Var->getPreferredRegister()) {
606 if (!Linked->hasReg())
607 continue;
608 }
609 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700610 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700611 if (SimpleCoalescing) {
612 if (Var->isMultiblockLife()) {
613 GlobalsSize += Increment;
614 } else {
615 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
616 LocalsSize[NodeIndex] += Increment;
617 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
618 LocalsSizeBytes = LocalsSize[NodeIndex];
619 }
620 } else {
621 LocalsSizeBytes += Increment;
622 }
623 }
624 LocalsSizeBytes += GlobalsSize;
625
626 // Add push instructions for preserved registers.
627 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
628 if (CalleeSaves[i] && RegsUsed[i]) {
629 PreservedRegsSizeBytes += 4;
630 const bool SuppressStackAdjustment = true;
631 _push(getPhysicalRegister(i), SuppressStackAdjustment);
632 }
633 }
634
635 // Generate "push ebp; mov ebp, esp"
636 if (IsEbpBasedFrame) {
637 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
638 .count() == 0);
639 PreservedRegsSizeBytes += 4;
640 Variable *ebp = getPhysicalRegister(Reg_ebp);
641 Variable *esp = getPhysicalRegister(Reg_esp);
642 const bool SuppressStackAdjustment = true;
643 _push(ebp, SuppressStackAdjustment);
644 _mov(ebp, esp);
645 }
646
647 // Generate "sub esp, LocalsSizeBytes"
648 if (LocalsSizeBytes)
649 _sub(getPhysicalRegister(Reg_esp),
650 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
651
652 resetStackAdjustment();
653
Matt Wala45a06232014-07-09 16:33:22 -0700654 // Fill in stack offsets for stack args, and copy args into registers
655 // for those that were register-allocated. Args are pushed right to
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700656 // left, so Arg[0] is closest to the stack/frame pointer.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700657 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700658 size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700659 if (!IsEbpBasedFrame)
660 BasicFrameOffset += LocalsSizeBytes;
Matt Wala45a06232014-07-09 16:33:22 -0700661
662 unsigned NumXmmArgs = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700663 for (SizeT i = 0; i < Args.size(); ++i) {
664 Variable *Arg = Args[i];
Matt Wala45a06232014-07-09 16:33:22 -0700665 // Skip arguments passed in registers.
666 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
667 ++NumXmmArgs;
668 continue;
669 }
670 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700671 }
672
673 // Fill in stack offsets for locals.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700674 size_t TotalGlobalsSize = GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700675 GlobalsSize = 0;
676 LocalsSize.assign(LocalsSize.size(), 0);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700677 size_t NextStackOffset = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700678 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
679 I != E; ++I) {
680 Variable *Var = *I;
681 if (Var->hasReg()) {
682 RegsUsed[Var->getRegNum()] = true;
683 continue;
684 }
685 if (Var->getIsArg())
686 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700687 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
688 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700689 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
690 if (Variable *Linked = Var->getPreferredRegister()) {
691 if (!Linked->hasReg()) {
692 // TODO: Make sure Linked has already been assigned a stack
693 // slot.
694 Var->setStackOffset(Linked->getStackOffset());
695 continue;
696 }
697 }
698 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700699 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700700 if (SimpleCoalescing) {
701 if (Var->isMultiblockLife()) {
702 GlobalsSize += Increment;
703 NextStackOffset = GlobalsSize;
704 } else {
705 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
706 LocalsSize[NodeIndex] += Increment;
707 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
708 }
709 } else {
710 NextStackOffset += Increment;
711 }
712 if (IsEbpBasedFrame)
713 Var->setStackOffset(-NextStackOffset);
714 else
715 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
716 }
717 this->FrameSizeLocals = NextStackOffset;
718 this->HasComputedFrame = true;
719
720 if (Func->getContext()->isVerbose(IceV_Frame)) {
721 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
722 << "\n"
723 << "InArgsSizeBytes=" << InArgsSizeBytes
724 << "\n"
725 << "PreservedRegsSizeBytes="
726 << PreservedRegsSizeBytes << "\n";
727 }
728}
729
730void TargetX8632::addEpilog(CfgNode *Node) {
731 InstList &Insts = Node->getInsts();
732 InstList::reverse_iterator RI, E;
733 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
734 if (llvm::isa<InstX8632Ret>(*RI))
735 break;
736 }
737 if (RI == E)
738 return;
739
740 // Convert the reverse_iterator position into its corresponding
741 // (forward) iterator position.
742 InstList::iterator InsertPoint = RI.base();
743 --InsertPoint;
744 Context.init(Node);
745 Context.setInsertPoint(InsertPoint);
746
747 Variable *esp = getPhysicalRegister(Reg_esp);
748 if (IsEbpBasedFrame) {
749 Variable *ebp = getPhysicalRegister(Reg_ebp);
750 _mov(esp, ebp);
751 _pop(ebp);
752 } else {
753 // add esp, LocalsSizeBytes
754 if (LocalsSizeBytes)
755 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
756 }
757
758 // Add pop instructions for preserved registers.
759 llvm::SmallBitVector CalleeSaves =
760 getRegisterSet(RegSet_CalleeSave, RegSet_None);
761 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
762 SizeT j = CalleeSaves.size() - i - 1;
763 if (j == Reg_ebp && IsEbpBasedFrame)
764 continue;
765 if (CalleeSaves[j] && RegsUsed[j]) {
766 _pop(getPhysicalRegister(j));
767 }
768 }
769}
770
Jim Stichnothf61d5b22014-05-23 13:31:24 -0700771template <typename T> struct PoolTypeConverter {};
772
773template <> struct PoolTypeConverter<float> {
774 typedef float PrimitiveFpType;
775 typedef uint32_t PrimitiveIntType;
776 typedef ConstantFloat IceType;
777 static const Type Ty = IceType_f32;
778 static const char *TypeName;
779 static const char *AsmTag;
780 static const char *PrintfString;
781};
782const char *PoolTypeConverter<float>::TypeName = "float";
783const char *PoolTypeConverter<float>::AsmTag = ".long";
784const char *PoolTypeConverter<float>::PrintfString = "0x%x";
785
786template <> struct PoolTypeConverter<double> {
787 typedef double PrimitiveFpType;
788 typedef uint64_t PrimitiveIntType;
789 typedef ConstantDouble IceType;
790 static const Type Ty = IceType_f64;
791 static const char *TypeName;
792 static const char *AsmTag;
793 static const char *PrintfString;
794};
795const char *PoolTypeConverter<double>::TypeName = "double";
796const char *PoolTypeConverter<double>::AsmTag = ".quad";
797const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
798
799template <typename T> void TargetX8632::emitConstantPool() const {
800 Ostream &Str = Ctx->getStrEmit();
801 Type Ty = T::Ty;
802 SizeT Align = typeAlignInBytes(Ty);
803 ConstantList Pool = Ctx->getConstantPool(Ty);
804
805 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
806 << "\n";
807 Str << "\t.align\t" << Align << "\n";
808 for (ConstantList::const_iterator I = Pool.begin(), E = Pool.end(); I != E;
809 ++I) {
810 typename T::IceType *Const = llvm::cast<typename T::IceType>(*I);
811 typename T::PrimitiveFpType Value = Const->getValue();
812 // Use memcpy() to copy bits from Value into RawValue in a way
813 // that avoids breaking strict-aliasing rules.
814 typename T::PrimitiveIntType RawValue;
815 memcpy(&RawValue, &Value, sizeof(Value));
816 char buf[30];
817 int CharsPrinted =
818 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
819 assert(CharsPrinted >= 0 &&
820 (size_t)CharsPrinted < llvm::array_lengthof(buf));
821 (void)CharsPrinted; // avoid warnings if asserts are disabled
822 Str << "L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
823 Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
824 << Value << "\n";
825 }
826}
827
828void TargetX8632::emitConstants() const {
829 emitConstantPool<PoolTypeConverter<float> >();
830 emitConstantPool<PoolTypeConverter<double> >();
831
832 // No need to emit constants from the int pool since (for x86) they
833 // are embedded as immediates in the instructions.
834}
835
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700836void TargetX8632::split64(Variable *Var) {
837 switch (Var->getType()) {
838 default:
839 return;
840 case IceType_i64:
841 // TODO: Only consider F64 if we need to push each half when
842 // passing as an argument to a function call. Note that each half
843 // is still typed as I32.
844 case IceType_f64:
845 break;
846 }
847 Variable *Lo = Var->getLo();
848 Variable *Hi = Var->getHi();
849 if (Lo) {
850 assert(Hi);
851 return;
852 }
853 assert(Hi == NULL);
854 Lo = Func->makeVariable(IceType_i32, Context.getNode(),
855 Var->getName() + "__lo");
856 Hi = Func->makeVariable(IceType_i32, Context.getNode(),
857 Var->getName() + "__hi");
858 Var->setLoHi(Lo, Hi);
859 if (Var->getIsArg()) {
860 Lo->setIsArg(Func);
861 Hi->setIsArg(Func);
862 }
863}
864
865Operand *TargetX8632::loOperand(Operand *Operand) {
866 assert(Operand->getType() == IceType_i64);
867 if (Operand->getType() != IceType_i64)
868 return Operand;
869 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
870 split64(Var);
871 return Var->getLo();
872 }
873 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
874 uint64_t Mask = (1ull << 32) - 1;
875 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);
876 }
877 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
878 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
879 Mem->getOffset(), Mem->getIndex(),
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700880 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700881 }
882 llvm_unreachable("Unsupported operand type");
883 return NULL;
884}
885
886Operand *TargetX8632::hiOperand(Operand *Operand) {
887 assert(Operand->getType() == IceType_i64);
888 if (Operand->getType() != IceType_i64)
889 return Operand;
890 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
891 split64(Var);
892 return Var->getHi();
893 }
894 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
895 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);
896 }
897 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
898 Constant *Offset = Mem->getOffset();
899 if (Offset == NULL)
900 Offset = Ctx->getConstantInt(IceType_i32, 4);
901 else if (ConstantInteger *IntOffset =
902 llvm::dyn_cast<ConstantInteger>(Offset)) {
903 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());
904 } else if (ConstantRelocatable *SymOffset =
905 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
906 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
907 SymOffset->getName());
908 }
909 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700910 Mem->getIndex(), Mem->getShift(),
911 Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700912 }
913 llvm_unreachable("Unsupported operand type");
914 return NULL;
915}
916
917llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
918 RegSetMask Exclude) const {
919 llvm::SmallBitVector Registers(Reg_NUM);
920
921#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
922 frameptr, isI8, isInt, isFP) \
923 if (scratch && (Include & RegSet_CallerSave)) \
924 Registers[val] = true; \
925 if (preserved && (Include & RegSet_CalleeSave)) \
926 Registers[val] = true; \
927 if (stackptr && (Include & RegSet_StackPointer)) \
928 Registers[val] = true; \
929 if (frameptr && (Include & RegSet_FramePointer)) \
930 Registers[val] = true; \
931 if (scratch && (Exclude & RegSet_CallerSave)) \
932 Registers[val] = false; \
933 if (preserved && (Exclude & RegSet_CalleeSave)) \
934 Registers[val] = false; \
935 if (stackptr && (Exclude & RegSet_StackPointer)) \
936 Registers[val] = false; \
937 if (frameptr && (Exclude & RegSet_FramePointer)) \
938 Registers[val] = false;
939
940 REGX8632_TABLE
941
942#undef X
943
944 return Registers;
945}
946
947void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
948 IsEbpBasedFrame = true;
949 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
950 // the number of adjustments of esp, etc.
951 Variable *esp = getPhysicalRegister(Reg_esp);
952 Operand *TotalSize = legalize(Inst->getSizeInBytes());
953 Variable *Dest = Inst->getDest();
954 _sub(esp, TotalSize);
955 _mov(Dest, esp);
956}
957
958void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
959 Variable *Dest = Inst->getDest();
960 Operand *Src0 = legalize(Inst->getSrc(0));
961 Operand *Src1 = legalize(Inst->getSrc(1));
962 if (Dest->getType() == IceType_i64) {
963 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
964 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
965 Operand *Src0Lo = loOperand(Src0);
966 Operand *Src0Hi = hiOperand(Src0);
967 Operand *Src1Lo = loOperand(Src1);
968 Operand *Src1Hi = hiOperand(Src1);
969 Variable *T_Lo = NULL, *T_Hi = NULL;
970 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -0700971 case InstArithmetic::_num:
972 llvm_unreachable("Unknown arithmetic operator");
973 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700974 case InstArithmetic::Add:
975 _mov(T_Lo, Src0Lo);
976 _add(T_Lo, Src1Lo);
977 _mov(DestLo, T_Lo);
978 _mov(T_Hi, Src0Hi);
979 _adc(T_Hi, Src1Hi);
980 _mov(DestHi, T_Hi);
981 break;
982 case InstArithmetic::And:
983 _mov(T_Lo, Src0Lo);
984 _and(T_Lo, Src1Lo);
985 _mov(DestLo, T_Lo);
986 _mov(T_Hi, Src0Hi);
987 _and(T_Hi, Src1Hi);
988 _mov(DestHi, T_Hi);
989 break;
990 case InstArithmetic::Or:
991 _mov(T_Lo, Src0Lo);
992 _or(T_Lo, Src1Lo);
993 _mov(DestLo, T_Lo);
994 _mov(T_Hi, Src0Hi);
995 _or(T_Hi, Src1Hi);
996 _mov(DestHi, T_Hi);
997 break;
998 case InstArithmetic::Xor:
999 _mov(T_Lo, Src0Lo);
1000 _xor(T_Lo, Src1Lo);
1001 _mov(DestLo, T_Lo);
1002 _mov(T_Hi, Src0Hi);
1003 _xor(T_Hi, Src1Hi);
1004 _mov(DestHi, T_Hi);
1005 break;
1006 case InstArithmetic::Sub:
1007 _mov(T_Lo, Src0Lo);
1008 _sub(T_Lo, Src1Lo);
1009 _mov(DestLo, T_Lo);
1010 _mov(T_Hi, Src0Hi);
1011 _sbb(T_Hi, Src1Hi);
1012 _mov(DestHi, T_Hi);
1013 break;
1014 case InstArithmetic::Mul: {
1015 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1016 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
1017 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);
1018 // gcc does the following:
1019 // a=b*c ==>
1020 // t1 = b.hi; t1 *=(imul) c.lo
1021 // t2 = c.hi; t2 *=(imul) b.lo
1022 // t3:eax = b.lo
1023 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1024 // a.lo = t4.lo
1025 // t4.hi += t1
1026 // t4.hi += t2
1027 // a.hi = t4.hi
1028 _mov(T_1, Src0Hi);
1029 _imul(T_1, Src1Lo);
1030 _mov(T_2, Src1Hi);
1031 _imul(T_2, Src0Lo);
1032 _mov(T_3, Src0Lo, Reg_eax);
1033 _mul(T_4Lo, T_3, Src1Lo);
1034 // The mul instruction produces two dest variables, edx:eax. We
1035 // create a fake definition of edx to account for this.
1036 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1037 _mov(DestLo, T_4Lo);
1038 _add(T_4Hi, T_1);
1039 _add(T_4Hi, T_2);
1040 _mov(DestHi, T_4Hi);
1041 } break;
1042 case InstArithmetic::Shl: {
1043 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1044 // gcc does the following:
1045 // a=b<<c ==>
1046 // t1:ecx = c.lo & 0xff
1047 // t2 = b.lo
1048 // t3 = b.hi
1049 // t3 = shld t3, t2, t1
1050 // t2 = shl t2, t1
1051 // test t1, 0x20
1052 // je L1
1053 // use(t3)
1054 // t3 = t2
1055 // t2 = 0
1056 // L1:
1057 // a.lo = t2
1058 // a.hi = t3
1059 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1060 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001061 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001062 InstX8632Label *Label = InstX8632Label::create(Func, this);
1063 _mov(T_1, Src1Lo, Reg_ecx);
1064 _mov(T_2, Src0Lo);
1065 _mov(T_3, Src0Hi);
1066 _shld(T_3, T_2, T_1);
1067 _shl(T_2, T_1);
1068 _test(T_1, BitTest);
1069 _br(InstX8632Br::Br_e, Label);
1070 // Because of the intra-block control flow, we need to fake a use
1071 // of T_3 to prevent its earlier definition from being dead-code
1072 // eliminated in the presence of its later definition.
1073 Context.insert(InstFakeUse::create(Func, T_3));
1074 _mov(T_3, T_2);
1075 _mov(T_2, Zero);
1076 Context.insert(Label);
1077 _mov(DestLo, T_2);
1078 _mov(DestHi, T_3);
1079 } break;
1080 case InstArithmetic::Lshr: {
1081 // a=b>>c (unsigned) ==>
1082 // t1:ecx = c.lo & 0xff
1083 // t2 = b.lo
1084 // t3 = b.hi
1085 // t2 = shrd t2, t3, t1
1086 // t3 = shr t3, t1
1087 // test t1, 0x20
1088 // je L1
1089 // use(t2)
1090 // t2 = t3
1091 // t3 = 0
1092 // L1:
1093 // a.lo = t2
1094 // a.hi = t3
1095 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1096 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001097 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001098 InstX8632Label *Label = InstX8632Label::create(Func, this);
1099 _mov(T_1, Src1Lo, Reg_ecx);
1100 _mov(T_2, Src0Lo);
1101 _mov(T_3, Src0Hi);
1102 _shrd(T_2, T_3, T_1);
1103 _shr(T_3, T_1);
1104 _test(T_1, BitTest);
1105 _br(InstX8632Br::Br_e, Label);
1106 // Because of the intra-block control flow, we need to fake a use
1107 // of T_3 to prevent its earlier definition from being dead-code
1108 // eliminated in the presence of its later definition.
1109 Context.insert(InstFakeUse::create(Func, T_2));
1110 _mov(T_2, T_3);
1111 _mov(T_3, Zero);
1112 Context.insert(Label);
1113 _mov(DestLo, T_2);
1114 _mov(DestHi, T_3);
1115 } break;
1116 case InstArithmetic::Ashr: {
1117 // a=b>>c (signed) ==>
1118 // t1:ecx = c.lo & 0xff
1119 // t2 = b.lo
1120 // t3 = b.hi
1121 // t2 = shrd t2, t3, t1
1122 // t3 = sar t3, t1
1123 // test t1, 0x20
1124 // je L1
1125 // use(t2)
1126 // t2 = t3
1127 // t3 = sar t3, 0x1f
1128 // L1:
1129 // a.lo = t2
1130 // a.hi = t3
1131 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1132 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
1133 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);
1134 InstX8632Label *Label = InstX8632Label::create(Func, this);
1135 _mov(T_1, Src1Lo, Reg_ecx);
1136 _mov(T_2, Src0Lo);
1137 _mov(T_3, Src0Hi);
1138 _shrd(T_2, T_3, T_1);
1139 _sar(T_3, T_1);
1140 _test(T_1, BitTest);
1141 _br(InstX8632Br::Br_e, Label);
1142 // Because of the intra-block control flow, we need to fake a use
1143 // of T_3 to prevent its earlier definition from being dead-code
1144 // eliminated in the presence of its later definition.
1145 Context.insert(InstFakeUse::create(Func, T_2));
1146 _mov(T_2, T_3);
1147 _sar(T_3, SignExtend);
1148 Context.insert(Label);
1149 _mov(DestLo, T_2);
1150 _mov(DestHi, T_3);
1151 } break;
1152 case InstArithmetic::Udiv: {
1153 const SizeT MaxSrcs = 2;
1154 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
1155 Call->addArg(Inst->getSrc(0));
1156 Call->addArg(Inst->getSrc(1));
1157 lowerCall(Call);
1158 } break;
1159 case InstArithmetic::Sdiv: {
1160 const SizeT MaxSrcs = 2;
1161 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
1162 Call->addArg(Inst->getSrc(0));
1163 Call->addArg(Inst->getSrc(1));
1164 lowerCall(Call);
1165 } break;
1166 case InstArithmetic::Urem: {
1167 const SizeT MaxSrcs = 2;
1168 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
1169 Call->addArg(Inst->getSrc(0));
1170 Call->addArg(Inst->getSrc(1));
1171 lowerCall(Call);
1172 } break;
1173 case InstArithmetic::Srem: {
1174 const SizeT MaxSrcs = 2;
1175 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
1176 Call->addArg(Inst->getSrc(0));
1177 Call->addArg(Inst->getSrc(1));
1178 lowerCall(Call);
1179 } break;
1180 case InstArithmetic::Fadd:
1181 case InstArithmetic::Fsub:
1182 case InstArithmetic::Fmul:
1183 case InstArithmetic::Fdiv:
1184 case InstArithmetic::Frem:
1185 llvm_unreachable("FP instruction with i64 type");
1186 break;
1187 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001188 } else if (isVectorType(Dest->getType())) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001189 // TODO: Trap on integer divide and integer modulo by zero.
1190 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1191 //
1192 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
1193 // registers. This is a workaround of the fact that there is no
1194 // support for aligning stack operands. Once there is support,
1195 // remove LEGAL_HACK.
1196#define LEGAL_HACK(s) legalizeToVar((s))
Matt Wala8d1072e2014-07-11 15:43:51 -07001197 switch (Inst->getOp()) {
1198 case InstArithmetic::_num:
1199 llvm_unreachable("Unknown arithmetic operator");
1200 break;
Matt Wala7fa22d82014-07-17 12:41:31 -07001201 case InstArithmetic::Add: {
1202 Variable *T = makeReg(Dest->getType());
1203 _movp(T, Src0);
1204 _padd(T, LEGAL_HACK(Src1));
1205 _movp(Dest, T);
1206 } break;
1207 case InstArithmetic::And: {
1208 Variable *T = makeReg(Dest->getType());
1209 _movp(T, Src0);
1210 _pand(T, LEGAL_HACK(Src1));
1211 _movp(Dest, T);
1212 } break;
1213 case InstArithmetic::Or: {
1214 Variable *T = makeReg(Dest->getType());
1215 _movp(T, Src0);
1216 _por(T, LEGAL_HACK(Src1));
1217 _movp(Dest, T);
1218 } break;
1219 case InstArithmetic::Xor: {
1220 Variable *T = makeReg(Dest->getType());
1221 _movp(T, Src0);
1222 _pxor(T, LEGAL_HACK(Src1));
1223 _movp(Dest, T);
1224 } break;
1225 case InstArithmetic::Sub: {
1226 Variable *T = makeReg(Dest->getType());
1227 _movp(T, Src0);
1228 _psub(T, LEGAL_HACK(Src1));
1229 _movp(Dest, T);
1230 } break;
1231 case InstArithmetic::Mul: {
1232 if (Dest->getType() == IceType_v4i32) {
1233 // Lowering sequence:
1234 // Note: The mask arguments have index 0 on the left.
1235 //
1236 // movups T1, Src0
1237 // pshufd T2, Src0, {1,0,3,0}
1238 // pshufd T3, Src1, {1,0,3,0}
1239 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1240 // pmuludq T1, Src1
1241 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1242 // pmuludq T2, T3
1243 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1244 // shufps T1, T2, {0,2,0,2}
1245 // pshufd T4, T1, {0,2,1,3}
1246 // movups Dest, T4
1247 //
1248 // TODO(wala): SSE4.1 has pmulld.
1249
1250 // Mask that directs pshufd to create a vector with entries
1251 // Src[1, 0, 3, 0]
1252 const unsigned Constant1030 = 0x31;
1253 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);
1254 // Mask that directs shufps to create a vector with entries
1255 // Dest[0, 2], Src[0, 2]
1256 const unsigned Mask0202 = 0x88;
1257 // Mask that directs pshufd to create a vector with entries
1258 // Src[0, 2, 1, 3]
1259 const unsigned Mask0213 = 0xd8;
1260 Variable *T1 = makeReg(IceType_v4i32);
1261 Variable *T2 = makeReg(IceType_v4i32);
1262 Variable *T3 = makeReg(IceType_v4i32);
1263 Variable *T4 = makeReg(IceType_v4i32);
1264 _movp(T1, Src0);
1265 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R
1266 // with Src1 after stack operand alignment support is
1267 // implemented.
1268 Variable *Src0R = LEGAL_HACK(Src0);
1269 Variable *Src1R = LEGAL_HACK(Src1);
1270 _pshufd(T2, Src0R, Mask1030);
1271 _pshufd(T3, Src1R, Mask1030);
1272 _pmuludq(T1, Src1R);
1273 _pmuludq(T2, T3);
1274 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
1275 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
1276 _movp(Dest, T4);
1277 } else if (Dest->getType() == IceType_v8i16) {
1278 Variable *T = makeReg(IceType_v8i16);
1279 _movp(T, Src0);
1280 _pmullw(T, legalizeToVar(Src1));
1281 _movp(Dest, T);
1282 } else {
1283 assert(Dest->getType() == IceType_v16i8);
1284 // Sz_mul_v16i8
1285 const IceString Helper = "Sz_mul_v16i8";
1286 const SizeT MaxSrcs = 2;
1287 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1288 Call->addArg(Src0);
1289 Call->addArg(Src1);
1290 lowerCall(Call);
1291 }
1292 } break;
1293 case InstArithmetic::Shl: {
1294 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8
1295 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());
1296 const SizeT MaxSrcs = 2;
1297 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1298 Call->addArg(Src0);
1299 Call->addArg(Src1);
1300 lowerCall(Call);
1301 } break;
1302 case InstArithmetic::Lshr: {
1303 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
1304 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
1305 const SizeT MaxSrcs = 2;
1306 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1307 Call->addArg(Src0);
1308 Call->addArg(Src1);
1309 lowerCall(Call);
1310 } break;
1311 case InstArithmetic::Ashr: {
1312 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8
1313 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());
1314 const SizeT MaxSrcs = 2;
1315 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1316 Call->addArg(Src0);
1317 Call->addArg(Src1);
1318 lowerCall(Call);
1319 } break;
1320 case InstArithmetic::Udiv: {
1321 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8
1322 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());
1323 const SizeT MaxSrcs = 2;
1324 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1325 Call->addArg(Src0);
1326 Call->addArg(Src1);
1327 lowerCall(Call);
1328 } break;
1329 case InstArithmetic::Sdiv: {
1330 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8
1331 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());
1332 const SizeT MaxSrcs = 2;
1333 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1334 Call->addArg(Src0);
1335 Call->addArg(Src1);
1336 lowerCall(Call);
1337 } break;
1338 case InstArithmetic::Urem: {
1339 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8
1340 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());
1341 const SizeT MaxSrcs = 2;
1342 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1343 Call->addArg(Src0);
1344 Call->addArg(Src1);
1345 lowerCall(Call);
1346 } break;
1347 case InstArithmetic::Srem: {
1348 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8
1349 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());
1350 const SizeT MaxSrcs = 2;
1351 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1352 Call->addArg(Src0);
1353 Call->addArg(Src1);
1354 lowerCall(Call);
1355 } break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001356 case InstArithmetic::Fadd: {
1357 Variable *T = makeReg(Dest->getType());
1358 _movp(T, Src0);
Matt Wala7fa22d82014-07-17 12:41:31 -07001359 _addps(T, LEGAL_HACK(Src1));
Matt Wala8d1072e2014-07-11 15:43:51 -07001360 _movp(Dest, T);
1361 } break;
1362 case InstArithmetic::Fsub: {
1363 Variable *T = makeReg(Dest->getType());
1364 _movp(T, Src0);
Matt Wala7fa22d82014-07-17 12:41:31 -07001365 _subps(T, LEGAL_HACK(Src1));
Matt Wala8d1072e2014-07-11 15:43:51 -07001366 _movp(Dest, T);
1367 } break;
1368 case InstArithmetic::Fmul: {
1369 Variable *T = makeReg(Dest->getType());
1370 _movp(T, Src0);
Matt Wala7fa22d82014-07-17 12:41:31 -07001371 _mulps(T, LEGAL_HACK(Src1));
Matt Wala8d1072e2014-07-11 15:43:51 -07001372 _movp(Dest, T);
1373 } break;
1374 case InstArithmetic::Fdiv: {
1375 Variable *T = makeReg(Dest->getType());
1376 _movp(T, Src0);
Matt Wala7fa22d82014-07-17 12:41:31 -07001377 _divps(T, LEGAL_HACK(Src1));
Matt Wala8d1072e2014-07-11 15:43:51 -07001378 _movp(Dest, T);
1379 } break;
1380 case InstArithmetic::Frem: {
Matt Wala0ecabc82014-07-14 17:18:14 -07001381 const SizeT MaxSrcs = 2;
Matt Wala7fa22d82014-07-17 12:41:31 -07001382 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);
Matt Wala8d1072e2014-07-11 15:43:51 -07001383 Call->addArg(Src0);
Matt Wala0ecabc82014-07-14 17:18:14 -07001384 Call->addArg(Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001385 lowerCall(Call);
1386 } break;
1387 }
Matt Wala7fa22d82014-07-17 12:41:31 -07001388#undef LEGAL_HACK
Matt Wala8d1072e2014-07-11 15:43:51 -07001389 } else { // Dest->getType() is non-i64 scalar
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001390 Variable *T_edx = NULL;
1391 Variable *T = NULL;
1392 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001393 case InstArithmetic::_num:
1394 llvm_unreachable("Unknown arithmetic operator");
1395 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001396 case InstArithmetic::Add:
1397 _mov(T, Src0);
1398 _add(T, Src1);
1399 _mov(Dest, T);
1400 break;
1401 case InstArithmetic::And:
1402 _mov(T, Src0);
1403 _and(T, Src1);
1404 _mov(Dest, T);
1405 break;
1406 case InstArithmetic::Or:
1407 _mov(T, Src0);
1408 _or(T, Src1);
1409 _mov(Dest, T);
1410 break;
1411 case InstArithmetic::Xor:
1412 _mov(T, Src0);
1413 _xor(T, Src1);
1414 _mov(Dest, T);
1415 break;
1416 case InstArithmetic::Sub:
1417 _mov(T, Src0);
1418 _sub(T, Src1);
1419 _mov(Dest, T);
1420 break;
1421 case InstArithmetic::Mul:
1422 // TODO: Optimize for llvm::isa<Constant>(Src1)
1423 // TODO: Strength-reduce multiplications by a constant,
1424 // particularly -1 and powers of 2. Advanced: use lea to
1425 // multiply by 3, 5, 9.
1426 //
1427 // The 8-bit version of imul only allows the form "imul r/m8"
1428 // where T must be in eax.
1429 if (Dest->getType() == IceType_i8)
1430 _mov(T, Src0, Reg_eax);
1431 else
1432 _mov(T, Src0);
1433 _imul(T, Src1);
1434 _mov(Dest, T);
1435 break;
1436 case InstArithmetic::Shl:
1437 _mov(T, Src0);
1438 if (!llvm::isa<Constant>(Src1))
1439 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1440 _shl(T, Src1);
1441 _mov(Dest, T);
1442 break;
1443 case InstArithmetic::Lshr:
1444 _mov(T, Src0);
1445 if (!llvm::isa<Constant>(Src1))
1446 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1447 _shr(T, Src1);
1448 _mov(Dest, T);
1449 break;
1450 case InstArithmetic::Ashr:
1451 _mov(T, Src0);
1452 if (!llvm::isa<Constant>(Src1))
1453 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1454 _sar(T, Src1);
1455 _mov(Dest, T);
1456 break;
1457 case InstArithmetic::Udiv:
Jan Voung70d68832014-06-17 10:02:37 -07001458 // div and idiv are the few arithmetic operators that do not allow
1459 // immediates as the operand.
1460 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001461 if (Dest->getType() == IceType_i8) {
1462 Variable *T_ah = NULL;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001463 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001464 _mov(T, Src0, Reg_eax);
1465 _mov(T_ah, Zero, Reg_ah);
1466 _div(T, Src1, T_ah);
1467 _mov(Dest, T);
1468 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001469 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001470 _mov(T, Src0, Reg_eax);
1471 _mov(T_edx, Zero, Reg_edx);
1472 _div(T, Src1, T_edx);
1473 _mov(Dest, T);
1474 }
1475 break;
1476 case InstArithmetic::Sdiv:
Jan Voung70d68832014-06-17 10:02:37 -07001477 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001478 T_edx = makeReg(IceType_i32, Reg_edx);
1479 _mov(T, Src0, Reg_eax);
1480 _cdq(T_edx, T);
1481 _idiv(T, Src1, T_edx);
1482 _mov(Dest, T);
1483 break;
1484 case InstArithmetic::Urem:
Jan Voung70d68832014-06-17 10:02:37 -07001485 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001486 if (Dest->getType() == IceType_i8) {
1487 Variable *T_ah = NULL;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001488 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001489 _mov(T, Src0, Reg_eax);
1490 _mov(T_ah, Zero, Reg_ah);
1491 _div(T_ah, Src1, T);
1492 _mov(Dest, T_ah);
1493 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001494 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001495 _mov(T_edx, Zero, Reg_edx);
1496 _mov(T, Src0, Reg_eax);
1497 _div(T_edx, Src1, T);
1498 _mov(Dest, T_edx);
1499 }
1500 break;
1501 case InstArithmetic::Srem:
Jan Voung70d68832014-06-17 10:02:37 -07001502 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001503 T_edx = makeReg(IceType_i32, Reg_edx);
1504 _mov(T, Src0, Reg_eax);
1505 _cdq(T_edx, T);
1506 _idiv(T_edx, Src1, T);
1507 _mov(Dest, T_edx);
1508 break;
1509 case InstArithmetic::Fadd:
1510 _mov(T, Src0);
1511 _addss(T, Src1);
1512 _mov(Dest, T);
1513 break;
1514 case InstArithmetic::Fsub:
1515 _mov(T, Src0);
1516 _subss(T, Src1);
1517 _mov(Dest, T);
1518 break;
1519 case InstArithmetic::Fmul:
1520 _mov(T, Src0);
1521 _mulss(T, Src1);
1522 _mov(Dest, T);
1523 break;
1524 case InstArithmetic::Fdiv:
1525 _mov(T, Src0);
1526 _divss(T, Src1);
1527 _mov(Dest, T);
1528 break;
1529 case InstArithmetic::Frem: {
1530 const SizeT MaxSrcs = 2;
1531 Type Ty = Dest->getType();
1532 InstCall *Call =
1533 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);
1534 Call->addArg(Src0);
1535 Call->addArg(Src1);
1536 return lowerCall(Call);
1537 } break;
1538 }
1539 }
1540}
1541
1542void TargetX8632::lowerAssign(const InstAssign *Inst) {
1543 Variable *Dest = Inst->getDest();
1544 Operand *Src0 = Inst->getSrc(0);
1545 assert(Dest->getType() == Src0->getType());
1546 if (Dest->getType() == IceType_i64) {
1547 Src0 = legalize(Src0);
1548 Operand *Src0Lo = loOperand(Src0);
1549 Operand *Src0Hi = hiOperand(Src0);
1550 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1551 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1552 Variable *T_Lo = NULL, *T_Hi = NULL;
1553 _mov(T_Lo, Src0Lo);
1554 _mov(DestLo, T_Lo);
1555 _mov(T_Hi, Src0Hi);
1556 _mov(DestHi, T_Hi);
1557 } else {
1558 const bool AllowOverlap = true;
1559 // RI is either a physical register or an immediate.
1560 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap);
Matt Wala45a06232014-07-09 16:33:22 -07001561 if (isVectorType(Dest->getType()))
1562 _movp(Dest, RI);
1563 else
1564 _mov(Dest, RI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001565 }
1566}
1567
1568void TargetX8632::lowerBr(const InstBr *Inst) {
1569 if (Inst->isUnconditional()) {
1570 _br(Inst->getTargetUnconditional());
1571 } else {
1572 Operand *Src0 = legalize(Inst->getCondition());
Matt Wala43ff7eb2014-06-18 10:30:07 -07001573 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001574 _cmp(Src0, Zero);
1575 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1576 }
1577}
1578
1579void TargetX8632::lowerCall(const InstCall *Instr) {
Matt Wala45a06232014-07-09 16:33:22 -07001580 // Classify each argument operand according to the location where the
1581 // argument is passed.
1582 OperandList XmmArgs;
1583 OperandList StackArgs;
1584 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1585 Operand *Arg = Instr->getArg(i);
1586 if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
1587 XmmArgs.push_back(Arg);
1588 } else {
1589 StackArgs.push_back(Arg);
1590 }
1591 }
1592 // For stack arguments, generate a sequence of push instructions,
1593 // pushing right to left, keeping track of stack offsets in case a
1594 // push involves a stack operand and we are using an esp-based frame.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001595 uint32_t StackOffset = 0;
Matt Wala45a06232014-07-09 16:33:22 -07001596 // TODO: Consolidate the stack adjustment for function calls by
1597 // reserving enough space for the arguments only once.
1598 //
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001599 // TODO: If for some reason the call instruction gets dead-code
1600 // eliminated after lowering, we would need to ensure that the
1601 // pre-call push instructions and the post-call esp adjustment get
1602 // eliminated as well.
Matt Wala45a06232014-07-09 16:33:22 -07001603 for (OperandList::reverse_iterator I = StackArgs.rbegin(),
1604 E = StackArgs.rend(); I != E; ++I) {
1605 Operand *Arg = legalize(*I);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001606 if (Arg->getType() == IceType_i64) {
1607 _push(hiOperand(Arg));
1608 _push(loOperand(Arg));
Matt Wala45a06232014-07-09 16:33:22 -07001609 } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) {
1610 // If the Arg turns out to be a memory operand, more than one push
1611 // instruction is required. This ends up being somewhat clumsy in
1612 // the current IR, so we use a workaround. Force the operand into
1613 // a (xmm) register, and then push the register. An xmm register
1614 // push is actually not possible in x86, but the Push instruction
1615 // emitter handles this by decrementing the stack pointer and
1616 // directly writing the xmm register value.
1617 _push(legalize(Arg, Legal_Reg));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001618 } else {
Jan Voungbdbe4022014-06-17 17:12:26 -07001619 // Otherwise PNaCl requires parameter types to be at least 32-bits.
1620 assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001621 _push(Arg);
1622 }
1623 StackOffset += typeWidthInBytesOnStack(Arg->getType());
1624 }
Matt Wala45a06232014-07-09 16:33:22 -07001625 // Copy arguments to be passed in registers to the appropriate
1626 // registers.
1627 // TODO: Investigate the impact of lowering arguments passed in
1628 // registers after lowering stack arguments as opposed to the other
1629 // way around. Lowering register arguments after stack arguments may
1630 // reduce register pressure. On the other hand, lowering register
1631 // arguments first (before stack arguments) may result in more compact
1632 // code, as the memory operand displacements may end up being smaller
1633 // before any stack adjustment is done.
1634 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
1635 Variable *Reg = legalizeToVar(XmmArgs[i], false, Reg_xmm0 + i);
1636 // Generate a FakeUse of register arguments so that they do not get
1637 // dead code eliminated as a result of the FakeKill of scratch
1638 // registers after the call.
1639 Context.insert(InstFakeUse::create(Func, Reg));
1640 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001641 // Generate the call instruction. Assign its result to a temporary
1642 // with high register allocation weight.
1643 Variable *Dest = Instr->getDest();
Matt Wala45a06232014-07-09 16:33:22 -07001644 // ReturnReg doubles as ReturnRegLo as necessary.
1645 Variable *ReturnReg = NULL;
1646 Variable *ReturnRegHi = NULL;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001647 if (Dest) {
1648 switch (Dest->getType()) {
1649 case IceType_NUM:
1650 llvm_unreachable("Invalid Call dest type");
1651 break;
1652 case IceType_void:
1653 break;
1654 case IceType_i1:
1655 case IceType_i8:
1656 case IceType_i16:
1657 case IceType_i32:
Matt Wala45a06232014-07-09 16:33:22 -07001658 ReturnReg = makeReg(Dest->getType(), Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001659 break;
1660 case IceType_i64:
Matt Wala45a06232014-07-09 16:33:22 -07001661 ReturnReg = makeReg(IceType_i32, Reg_eax);
1662 ReturnRegHi = makeReg(IceType_i32, Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001663 break;
1664 case IceType_f32:
1665 case IceType_f64:
Matt Wala45a06232014-07-09 16:33:22 -07001666 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with
1667 // the fstp instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001668 break;
Matt Wala928f1292014-07-07 16:50:46 -07001669 case IceType_v4i1:
1670 case IceType_v8i1:
1671 case IceType_v16i1:
1672 case IceType_v16i8:
1673 case IceType_v8i16:
1674 case IceType_v4i32:
Matt Wala45a06232014-07-09 16:33:22 -07001675 case IceType_v4f32:
1676 ReturnReg = makeReg(Dest->getType(), Reg_xmm0);
1677 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001678 }
1679 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07001680 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once
1681 // a proper emitter is used.
1682 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);
Matt Wala45a06232014-07-09 16:33:22 -07001683 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001684 Context.insert(NewCall);
Matt Wala45a06232014-07-09 16:33:22 -07001685 if (ReturnRegHi)
1686 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001687
1688 // Add the appropriate offset to esp.
1689 if (StackOffset) {
1690 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1691 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
1692 }
1693
1694 // Insert a register-kill pseudo instruction.
1695 VarList KilledRegs;
1696 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1697 if (ScratchRegs[i])
1698 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1699 }
1700 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
1701
1702 // Generate a FakeUse to keep the call live if necessary.
Matt Wala45a06232014-07-09 16:33:22 -07001703 if (Instr->hasSideEffects() && ReturnReg) {
1704 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001705 Context.insert(FakeUse);
1706 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001707
Matt Wala45a06232014-07-09 16:33:22 -07001708 if (!Dest)
1709 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001710
Matt Wala45a06232014-07-09 16:33:22 -07001711 // Assign the result of the call to Dest.
1712 if (ReturnReg) {
1713 if (ReturnRegHi) {
1714 assert(Dest->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001715 split64(Dest);
1716 Variable *DestLo = Dest->getLo();
1717 Variable *DestHi = Dest->getHi();
Matt Wala45a06232014-07-09 16:33:22 -07001718 DestLo->setPreferredRegister(ReturnReg, false);
1719 DestHi->setPreferredRegister(ReturnRegHi, false);
1720 _mov(DestLo, ReturnReg);
1721 _mov(DestHi, ReturnRegHi);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001722 } else {
Matt Wala45a06232014-07-09 16:33:22 -07001723 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1724 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1725 isVectorType(Dest->getType()));
1726 Dest->setPreferredRegister(ReturnReg, false);
1727 if (isVectorType(Dest->getType())) {
1728 _movp(Dest, ReturnReg);
1729 } else {
1730 _mov(Dest, ReturnReg);
1731 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001732 }
Matt Wala45a06232014-07-09 16:33:22 -07001733 } else if (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64) {
1734 // Special treatment for an FP function which returns its result in
1735 // st(0).
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001736 _fstp(Dest);
Matt Wala45a06232014-07-09 16:33:22 -07001737 // If Dest ends up being a physical xmm register, the fstp emit code
1738 // will route st(0) through a temporary stack slot.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001739 }
1740}
1741
1742void TargetX8632::lowerCast(const InstCast *Inst) {
1743 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1744 InstCast::OpKind CastKind = Inst->getCastKind();
1745 Variable *Dest = Inst->getDest();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001746 switch (CastKind) {
1747 default:
1748 Func->setError("Cast type not supported");
1749 return;
Jan Voung1ee34162014-06-24 13:43:30 -07001750 case InstCast::Sext: {
1751 // Src0RM is the source operand legalized to physical register or memory,
1752 // but not immediate, since the relevant x86 native instructions don't
1753 // allow an immediate operand. If the operand is an immediate, we could
1754 // consider computing the strength-reduced result at translation time,
1755 // but we're unlikely to see something like that in the bitcode that
1756 // the optimizer wouldn't have already taken care of.
1757 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001758 if (Dest->getType() == IceType_i64) {
1759 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1760 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1761 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1762 Variable *T_Lo = makeReg(DestLo->getType());
1763 if (Src0RM->getType() == IceType_i32)
1764 _mov(T_Lo, Src0RM);
1765 else
1766 _movsx(T_Lo, Src0RM);
1767 _mov(DestLo, T_Lo);
1768 Variable *T_Hi = NULL;
1769 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);
1770 _mov(T_Hi, T_Lo);
1771 _sar(T_Hi, Shift);
1772 _mov(DestHi, T_Hi);
Matt Wala83b80362014-07-16 10:21:30 -07001773 } else if (isVectorType(Dest->getType())) {
1774 Type DestTy = Dest->getType();
1775 if (DestTy == IceType_v16i8) {
1776 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
1777 Variable *OneMask = makeVectorOfOnes(Dest->getType());
1778 Variable *T = makeReg(DestTy);
1779 _movp(T, Src0RM);
1780 _pand(T, OneMask);
1781 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1782 _pcmpgt(T, Zeros);
1783 _movp(Dest, T);
1784 } else {
1785 // width = width(elty) - 1; dest = (src << width) >> width
1786 SizeT ShiftAmount =
1787 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
1788 Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount);
1789 Variable *T = makeReg(DestTy);
1790 _movp(T, Src0RM);
1791 _psll(T, ShiftConstant);
1792 _psra(T, ShiftConstant);
1793 _movp(Dest, T);
1794 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001795 } else {
1796 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
1797 // also copy to the high operand of a 64-bit variable.
1798 // t1 = movsx src; dst = t1
1799 Variable *T = makeReg(Dest->getType());
1800 _movsx(T, Src0RM);
1801 _mov(Dest, T);
1802 }
1803 break;
Jan Voung1ee34162014-06-24 13:43:30 -07001804 }
1805 case InstCast::Zext: {
1806 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001807 if (Dest->getType() == IceType_i64) {
1808 // t1=movzx src; dst.lo=t1; dst.hi=0
Matt Wala43ff7eb2014-06-18 10:30:07 -07001809 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001810 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1811 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1812 Variable *Tmp = makeReg(DestLo->getType());
1813 if (Src0RM->getType() == IceType_i32)
1814 _mov(Tmp, Src0RM);
1815 else
1816 _movzx(Tmp, Src0RM);
1817 _mov(DestLo, Tmp);
1818 _mov(DestHi, Zero);
1819 } else if (Src0RM->getType() == IceType_i1) {
1820 // t = Src0RM; t &= 1; Dest = t
1821 Operand *One = Ctx->getConstantInt(IceType_i32, 1);
1822 Variable *T = makeReg(IceType_i32);
1823 _movzx(T, Src0RM);
1824 _and(T, One);
1825 _mov(Dest, T);
Matt Wala83b80362014-07-16 10:21:30 -07001826 } else if (isVectorType(Dest->getType())) {
1827 // onemask = materialize(1,1,...); dest = onemask & src
1828 Type DestTy = Dest->getType();
1829 Variable *OneMask = makeVectorOfOnes(DestTy);
1830 Variable *T = makeReg(DestTy);
1831 _movp(T, Src0RM);
1832 _pand(T, OneMask);
1833 _movp(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001834 } else {
1835 // t1 = movzx src; dst = t1
1836 Variable *T = makeReg(Dest->getType());
1837 _movzx(T, Src0RM);
1838 _mov(Dest, T);
1839 }
1840 break;
Jan Voung1ee34162014-06-24 13:43:30 -07001841 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001842 case InstCast::Trunc: {
Matt Wala83b80362014-07-16 10:21:30 -07001843 if (isVectorType(Dest->getType())) {
1844 // onemask = materialize(1,1,...); dst = src & onemask
1845 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1846 Type Src0Ty = Src0RM->getType();
1847 Variable *OneMask = makeVectorOfOnes(Src0Ty);
1848 Variable *T = makeReg(Dest->getType());
1849 _movp(T, Src0RM);
1850 _pand(T, OneMask);
1851 _movp(Dest, T);
1852 } else {
1853 Operand *Src0 = Inst->getSrc(0);
1854 if (Src0->getType() == IceType_i64)
1855 Src0 = loOperand(Src0);
1856 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
1857 // t1 = trunc Src0RM; Dest = t1
1858 Variable *T = NULL;
1859 _mov(T, Src0RM);
1860 _mov(Dest, T);
1861 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001862 break;
1863 }
1864 case InstCast::Fptrunc:
1865 case InstCast::Fpext: {
Jan Voung1ee34162014-06-24 13:43:30 -07001866 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001867 // t1 = cvt Src0RM; Dest = t1
1868 Variable *T = makeReg(Dest->getType());
1869 _cvt(T, Src0RM);
1870 _mov(Dest, T);
1871 break;
1872 }
1873 case InstCast::Fptosi:
Matt Wala83b80362014-07-16 10:21:30 -07001874 if (isVectorType(Dest->getType())) {
1875 assert(Dest->getType() == IceType_v4i32 &&
1876 Inst->getSrc(0)->getType() == IceType_v4f32);
1877 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1878 Variable *T = makeReg(Dest->getType());
1879 _cvt(T, Src0RM);
1880 _movp(Dest, T);
1881 } else if (Dest->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001882 // Use a helper for converting floating-point values to 64-bit
1883 // integers. SSE2 appears to have no way to convert from xmm
1884 // registers to something like the edx:eax register pair, and
1885 // gcc and clang both want to use x87 instructions complete with
1886 // temporary manipulation of the status word. This helper is
1887 // not needed for x86-64.
1888 split64(Dest);
1889 const SizeT MaxSrcs = 1;
1890 Type SrcType = Inst->getSrc(0)->getType();
1891 InstCall *Call = makeHelperCall(
1892 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);
1893 // TODO: Call the correct compiler-rt helper function.
1894 Call->addArg(Inst->getSrc(0));
1895 lowerCall(Call);
1896 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07001897 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001898 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1899 Variable *T_1 = makeReg(IceType_i32);
1900 Variable *T_2 = makeReg(Dest->getType());
1901 _cvt(T_1, Src0RM);
1902 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1903 _mov(Dest, T_2);
1904 T_2->setPreferredRegister(T_1, true);
1905 }
1906 break;
1907 case InstCast::Fptoui:
Matt Wala83b80362014-07-16 10:21:30 -07001908 if (isVectorType(Dest->getType())) {
1909 assert(Dest->getType() == IceType_v4i32 &&
1910 Inst->getSrc(0)->getType() == IceType_v4f32);
1911 const SizeT MaxSrcs = 1;
1912 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
1913 Call->addArg(Inst->getSrc(0));
1914 lowerCall(Call);
1915 } else if (Dest->getType() == IceType_i64 ||
1916 Dest->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001917 // Use a helper for both x86-32 and x86-64.
1918 split64(Dest);
1919 const SizeT MaxSrcs = 1;
1920 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07001921 Type SrcType = Inst->getSrc(0)->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001922 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");
1923 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");
1924 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
1925 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
1926 // TODO: Call the correct compiler-rt helper function.
1927 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1928 Call->addArg(Inst->getSrc(0));
1929 lowerCall(Call);
1930 return;
1931 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07001932 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001933 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1934 Variable *T_1 = makeReg(IceType_i32);
1935 Variable *T_2 = makeReg(Dest->getType());
1936 _cvt(T_1, Src0RM);
1937 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1938 _mov(Dest, T_2);
1939 T_2->setPreferredRegister(T_1, true);
1940 }
1941 break;
1942 case InstCast::Sitofp:
Matt Wala83b80362014-07-16 10:21:30 -07001943 if (isVectorType(Dest->getType())) {
1944 assert(Dest->getType() == IceType_v4f32 &&
1945 Inst->getSrc(0)->getType() == IceType_v4i32);
1946 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1947 Variable *T = makeReg(Dest->getType());
1948 _cvt(T, Src0RM);
1949 _movp(Dest, T);
1950 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001951 // Use a helper for x86-32.
1952 const SizeT MaxSrcs = 1;
1953 Type DestType = Dest->getType();
1954 InstCall *Call = makeHelperCall(
1955 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);
1956 // TODO: Call the correct compiler-rt helper function.
1957 Call->addArg(Inst->getSrc(0));
1958 lowerCall(Call);
1959 return;
1960 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07001961 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001962 // Sign-extend the operand.
1963 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
1964 Variable *T_1 = makeReg(IceType_i32);
1965 Variable *T_2 = makeReg(Dest->getType());
1966 if (Src0RM->getType() == IceType_i32)
1967 _mov(T_1, Src0RM);
1968 else
1969 _movsx(T_1, Src0RM);
1970 _cvt(T_2, T_1);
1971 _mov(Dest, T_2);
1972 }
1973 break;
Jan Voung1ee34162014-06-24 13:43:30 -07001974 case InstCast::Uitofp: {
1975 Operand *Src0 = Inst->getSrc(0);
Matt Wala83b80362014-07-16 10:21:30 -07001976 if (isVectorType(Src0->getType())) {
1977 assert(Dest->getType() == IceType_v4f32 &&
1978 Src0->getType() == IceType_v4i32);
1979 const SizeT MaxSrcs = 1;
1980 InstCall *Call = makeHelperCall("Sz_uitofp_v4i32", Dest, MaxSrcs);
1981 Call->addArg(Src0);
1982 lowerCall(Call);
1983 } else if (Src0->getType() == IceType_i64 ||
1984 Src0->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001985 // Use a helper for x86-32 and x86-64. Also use a helper for
1986 // i32 on x86-32.
1987 const SizeT MaxSrcs = 1;
1988 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07001989 IceString SrcSubstring = (Src0->getType() == IceType_i64 ? "64" : "32");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001990 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");
1991 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
1992 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
1993 // TODO: Call the correct compiler-rt helper function.
1994 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Jan Voung1ee34162014-06-24 13:43:30 -07001995 Call->addArg(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001996 lowerCall(Call);
1997 return;
1998 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07001999 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002000 // Zero-extend the operand.
2001 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2002 Variable *T_1 = makeReg(IceType_i32);
2003 Variable *T_2 = makeReg(Dest->getType());
2004 if (Src0RM->getType() == IceType_i32)
2005 _mov(T_1, Src0RM);
2006 else
2007 _movzx(T_1, Src0RM);
2008 _cvt(T_2, T_1);
2009 _mov(Dest, T_2);
2010 }
2011 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002012 }
2013 case InstCast::Bitcast: {
2014 Operand *Src0 = Inst->getSrc(0);
2015 if (Dest->getType() == Src0->getType()) {
2016 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002017 lowerAssign(Assign);
2018 return;
2019 }
2020 switch (Dest->getType()) {
2021 default:
2022 llvm_unreachable("Unexpected Bitcast dest type");
Matt Wala83b80362014-07-16 10:21:30 -07002023 case IceType_i8: {
2024 assert(Src0->getType() == IceType_v8i1);
2025 InstCall *Call = makeHelperCall("Sz_bitcast_v8i1_to_i8", Dest, 1);
2026 Call->addArg(Src0);
2027 lowerCall(Call);
2028 } break;
2029 case IceType_i16: {
2030 assert(Src0->getType() == IceType_v16i1);
2031 InstCall *Call = makeHelperCall("Sz_bitcast_v16i1_to_i16", Dest, 1);
2032 Call->addArg(Src0);
2033 lowerCall(Call);
2034 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002035 case IceType_i32:
2036 case IceType_f32: {
Jan Voung1ee34162014-06-24 13:43:30 -07002037 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002038 Type DestType = Dest->getType();
2039 Type SrcType = Src0RM->getType();
2040 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2041 (DestType == IceType_f32 && SrcType == IceType_i32));
2042 // a.i32 = bitcast b.f32 ==>
2043 // t.f32 = b.f32
2044 // s.f32 = spill t.f32
2045 // a.i32 = s.f32
2046 Variable *T = NULL;
2047 // TODO: Should be able to force a spill setup by calling legalize() with
2048 // Legal_Mem and not Legal_Reg or Legal_Imm.
2049 Variable *Spill = Func->makeVariable(SrcType, Context.getNode());
2050 Spill->setWeight(RegWeight::Zero);
2051 Spill->setPreferredRegister(Dest, true);
2052 _mov(T, Src0RM);
2053 _mov(Spill, T);
2054 _mov(Dest, Spill);
2055 } break;
2056 case IceType_i64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002057 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002058 assert(Src0RM->getType() == IceType_f64);
2059 // a.i64 = bitcast b.f64 ==>
2060 // s.f64 = spill b.f64
2061 // t_lo.i32 = lo(s.f64)
2062 // a_lo.i32 = t_lo.i32
2063 // t_hi.i32 = hi(s.f64)
2064 // a_hi.i32 = t_hi.i32
2065 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
2066 Spill->setWeight(RegWeight::Zero);
2067 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
Jan Voung5cd240d2014-06-25 10:36:46 -07002068 _movq(Spill, Src0RM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002069
2070 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2071 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2072 Variable *T_Lo = makeReg(IceType_i32);
2073 Variable *T_Hi = makeReg(IceType_i32);
2074 VariableSplit *SpillLo =
2075 VariableSplit::create(Func, Spill, VariableSplit::Low);
2076 VariableSplit *SpillHi =
2077 VariableSplit::create(Func, Spill, VariableSplit::High);
2078
2079 _mov(T_Lo, SpillLo);
2080 _mov(DestLo, T_Lo);
2081 _mov(T_Hi, SpillHi);
2082 _mov(DestHi, T_Hi);
2083 } break;
2084 case IceType_f64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002085 Src0 = legalize(Src0);
2086 assert(Src0->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002087 // a.f64 = bitcast b.i64 ==>
2088 // t_lo.i32 = b_lo.i32
Jan Voung1ee34162014-06-24 13:43:30 -07002089 // FakeDef(s.f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002090 // lo(s.f64) = t_lo.i32
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002091 // t_hi.i32 = b_hi.i32
2092 // hi(s.f64) = t_hi.i32
2093 // a.f64 = s.f64
2094 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
2095 Spill->setWeight(RegWeight::Zero);
2096 Spill->setPreferredRegister(Dest, true);
2097
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002098 Variable *T_Lo = NULL, *T_Hi = NULL;
2099 VariableSplit *SpillLo =
2100 VariableSplit::create(Func, Spill, VariableSplit::Low);
2101 VariableSplit *SpillHi =
2102 VariableSplit::create(Func, Spill, VariableSplit::High);
Jan Voung1ee34162014-06-24 13:43:30 -07002103 _mov(T_Lo, loOperand(Src0));
2104 // Technically, the Spill is defined after the _store happens, but
2105 // SpillLo is considered a "use" of Spill so define Spill before it
2106 // is used.
2107 Context.insert(InstFakeDef::create(Func, Spill));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002108 _store(T_Lo, SpillLo);
Jan Voung1ee34162014-06-24 13:43:30 -07002109 _mov(T_Hi, hiOperand(Src0));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002110 _store(T_Hi, SpillHi);
Jan Voung5cd240d2014-06-25 10:36:46 -07002111 _movq(Dest, Spill);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002112 } break;
Matt Wala83b80362014-07-16 10:21:30 -07002113 case IceType_v8i1: {
2114 assert(Src0->getType() == IceType_i8);
2115 InstCall *Call = makeHelperCall("Sz_bitcast_i8_to_v8i1", Dest, 1);
Jan Voung957c50d2014-07-21 14:05:29 -07002116 Variable *Src0AsI32 = Func->makeVariable(stackSlotType(),
2117 Context.getNode());
Matt Wala83b80362014-07-16 10:21:30 -07002118 // Arguments to functions are required to be at least 32 bits wide.
2119 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2120 Call->addArg(Src0AsI32);
2121 lowerCall(Call);
2122 } break;
2123 case IceType_v16i1: {
2124 assert(Src0->getType() == IceType_i16);
2125 InstCall *Call = makeHelperCall("Sz_bitcast_i16_to_v16i1", Dest, 1);
Jan Voung957c50d2014-07-21 14:05:29 -07002126 Variable *Src0AsI32 = Func->makeVariable(stackSlotType(),
2127 Context.getNode());
Matt Wala83b80362014-07-16 10:21:30 -07002128 // Arguments to functions are required to be at least 32 bits wide.
2129 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2130 Call->addArg(Src0AsI32);
2131 lowerCall(Call);
2132 } break;
2133 case IceType_v8i16:
2134 case IceType_v16i8:
2135 case IceType_v4i32:
2136 case IceType_v4f32: {
2137 _movp(Dest, legalizeToVar(Src0));
2138 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002139 }
2140 break;
2141 }
Jan Voung1ee34162014-06-24 13:43:30 -07002142 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002143}
2144
Matt Wala49889232014-07-18 12:45:09 -07002145void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
2146 Operand *SourceVectOperand = Inst->getSrc(0);
2147 ConstantInteger *ElementIndex =
2148 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));
2149 // Only constant indices are allowed in PNaCl IR.
2150 assert(ElementIndex);
2151
2152 unsigned Index = ElementIndex->getValue();
2153 Type Ty = SourceVectOperand->getType();
2154 Type ElementTy = typeElementType(Ty);
2155 Type InVectorElementTy = getInVectorElementType(Ty);
2156 Variable *ExtractedElement = makeReg(InVectorElementTy);
2157
2158 // TODO(wala): Determine the best lowering sequences for each type.
2159 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2160 // Lower extractelement operations where the element is 32 bits
2161 // wide with pshufd.
2162 // TODO(wala): SSE4.1 has extractps and pextrd
2163 //
2164 // ALIGNHACK: Force vector operands to registers in instructions that
2165 // require aligned memory operands until support for stack alignment
2166 // is implemented.
2167#define ALIGN_HACK(Vect) legalizeToVar((Vect))
2168 Operand *T = NULL;
2169 if (Index) {
2170 // The shuffle only needs to occur if the element to be extracted
2171 // is not at the lowest index.
2172 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2173 T = makeReg(Ty);
2174 _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask);
2175 } else {
2176 // TODO(wala): If SourceVectOperand is in memory, express it as
2177 // mem32 so that the call to legalizeToVar() is made unnecessary.
2178 // _movd and _movss only take mem32 memory operands.
2179 T = legalizeToVar(SourceVectOperand);
2180 }
2181
2182 if (InVectorElementTy == IceType_i32) {
2183 _movd(ExtractedElement, T);
2184 } else { // InVectorElementTy == IceType_f32
2185 // TODO: _mov should be able to be used here.
2186 _movss(ExtractedElement, T);
2187 }
2188#undef ALIGN_HACK
2189 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
2190 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2191 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask);
2192 } else {
2193 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2194 // Spill the value to a stack slot and do the extraction in memory.
2195 // TODO(wala): SSE4.1 has pextrb.
2196 //
2197 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
2198 // support for legalizing to mem is implemented.
2199 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2200 Slot->setWeight(RegWeight::Zero);
2201 _movp(Slot, legalizeToVar(SourceVectOperand));
2202
2203 // Compute the location of the element in memory.
2204 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2205 OperandX8632Mem *Loc =
2206 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2207 _mov(ExtractedElement, Loc);
2208 }
2209
2210 if (ElementTy == IceType_i1) {
2211 // Truncate extracted integers to i1s if necessary.
2212 Variable *T = makeReg(IceType_i1);
2213 InstCast *Cast =
2214 InstCast::create(Func, InstCast::Trunc, T, ExtractedElement);
2215 lowerCast(Cast);
2216 ExtractedElement = T;
2217 }
2218
2219 // Copy the element to the destination.
2220 Variable *Dest = Inst->getDest();
2221 _mov(Dest, ExtractedElement);
2222}
2223
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002224void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
2225 Operand *Src0 = Inst->getSrc(0);
2226 Operand *Src1 = Inst->getSrc(1);
2227 Variable *Dest = Inst->getDest();
Matt Walace0ca8f2014-07-24 12:34:20 -07002228
2229 if (isVectorType(Dest->getType())) {
2230 InstFcmp::FCond Condition = Inst->getCondition();
2231 size_t Index = static_cast<size_t>(Condition);
2232 assert(Index < TableFcmpSize);
2233
2234 if (TableFcmp[Index].SwapVectorOperands) {
2235 Operand *T = Src0;
2236 Src0 = Src1;
2237 Src1 = T;
2238 }
2239
2240 Variable *T = NULL;
2241
2242 // ALIGNHACK: Without support for stack alignment, both operands to
2243 // cmpps need to be forced into registers. Once support for stack
2244 // alignment is implemented, remove LEGAL_HACK.
2245#define LEGAL_HACK(Vect) legalizeToVar((Vect))
2246 switch (Condition) {
2247 default: {
2248 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
2249 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
2250 T = makeReg(Src0->getType());
2251 _movp(T, Src0);
2252 _cmpps(T, LEGAL_HACK(Src1), Predicate);
2253 } break;
2254 case InstFcmp::False:
2255 T = makeVectorOfZeros(Src0->getType());
2256 break;
2257 case InstFcmp::One: {
2258 // Check both unequal and ordered.
2259 T = makeReg(Src0->getType());
2260 Variable *T2 = makeReg(Src0->getType());
2261 Src1 = LEGAL_HACK(Src1);
2262 _movp(T, Src0);
2263 _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq);
2264 _movp(T2, Src0);
2265 _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord);
2266 _pand(T, T2);
2267 } break;
2268 case InstFcmp::Ueq: {
2269 // Check both equal or unordered.
2270 T = makeReg(Src0->getType());
2271 Variable *T2 = makeReg(Src0->getType());
2272 Src1 = LEGAL_HACK(Src1);
2273 _movp(T, Src0);
2274 _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq);
2275 _movp(T2, Src0);
2276 _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord);
2277 _por(T, T2);
2278 } break;
2279 case InstFcmp::True:
2280 T = makeVectorOfMinusOnes(IceType_v4i32);
2281 break;
2282 }
2283#undef LEGAL_HACK
2284
2285 _movp(Dest, T);
2286 eliminateNextVectorSextInstruction(Dest);
2287 return;
2288 }
2289
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002290 // Lowering a = fcmp cond, b, c
2291 // ucomiss b, c /* only if C1 != Br_None */
2292 // /* but swap b,c order if SwapOperands==true */
2293 // mov a, <default>
2294 // j<C1> label /* only if C1 != Br_None */
2295 // j<C2> label /* only if C2 != Br_None */
2296 // FakeUse(a) /* only if C1 != Br_None */
2297 // mov a, !<default> /* only if C1 != Br_None */
2298 // label: /* only if C1 != Br_None */
2299 InstFcmp::FCond Condition = Inst->getCondition();
2300 size_t Index = static_cast<size_t>(Condition);
2301 assert(Index < TableFcmpSize);
Matt Walace0ca8f2014-07-24 12:34:20 -07002302 if (TableFcmp[Index].SwapScalarOperands) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002303 Operand *Tmp = Src0;
2304 Src0 = Src1;
2305 Src1 = Tmp;
2306 }
2307 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
2308 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
2309 if (HasC1) {
2310 Src0 = legalize(Src0);
2311 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2312 Variable *T = NULL;
2313 _mov(T, Src0);
2314 _ucomiss(T, Src1RM);
2315 }
2316 Constant *Default =
2317 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
2318 _mov(Dest, Default);
2319 if (HasC1) {
2320 InstX8632Label *Label = InstX8632Label::create(Func, this);
2321 _br(TableFcmp[Index].C1, Label);
2322 if (HasC2) {
2323 _br(TableFcmp[Index].C2, Label);
2324 }
2325 Context.insert(InstFakeUse::create(Func, Dest));
2326 Constant *NonDefault =
2327 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
2328 _mov(Dest, NonDefault);
2329 Context.insert(Label);
2330 }
2331}
2332
2333void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2334 Operand *Src0 = legalize(Inst->getSrc(0));
2335 Operand *Src1 = legalize(Inst->getSrc(1));
2336 Variable *Dest = Inst->getDest();
2337
Matt Wala9a0168a2014-07-23 14:56:10 -07002338 if (isVectorType(Dest->getType())) {
2339 Type Ty = Src0->getType();
2340 // Promote i1 vectors to 128 bit integer vector types.
2341 if (typeElementType(Ty) == IceType_i1) {
2342 Type NewTy = IceType_NUM;
2343 switch (Ty) {
2344 default:
2345 llvm_unreachable("unexpected type");
2346 break;
2347 case IceType_v4i1:
2348 NewTy = IceType_v4i32;
2349 break;
2350 case IceType_v8i1:
2351 NewTy = IceType_v8i16;
2352 break;
2353 case IceType_v16i1:
2354 NewTy = IceType_v16i8;
2355 break;
2356 }
2357 Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());
2358 Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());
2359 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2360 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2361 Src0 = NewSrc0;
2362 Src1 = NewSrc1;
2363 Ty = NewTy;
2364 }
2365
2366 InstIcmp::ICond Condition = Inst->getCondition();
2367
2368 // SSE2 only has signed comparison operations. Transform unsigned
2369 // inputs in a manner that allows for the use of signed comparison
2370 // operations by flipping the high order bits.
2371 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2372 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2373 Variable *T0 = makeReg(Ty);
2374 Variable *T1 = makeReg(Ty);
2375 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2376 _movp(T0, Src0);
2377 _pxor(T0, HighOrderBits);
2378 _movp(T1, Src1);
2379 _pxor(T1, HighOrderBits);
2380 Src0 = T0;
2381 Src1 = T1;
2382 }
2383
2384 // TODO: ALIGNHACK: Both operands to compare instructions need to be
2385 // in registers until stack alignment support is implemented. Once
2386 // there is support for stack alignment, LEGAL_HACK can be removed.
2387#define LEGAL_HACK(Vect) legalizeToVar((Vect))
2388 Variable *T = makeReg(Ty);
2389 switch (Condition) {
2390 default:
2391 llvm_unreachable("unexpected condition");
2392 break;
2393 case InstIcmp::Eq: {
2394 _movp(T, Src0);
2395 _pcmpeq(T, LEGAL_HACK(Src1));
2396 } break;
2397 case InstIcmp::Ne: {
2398 _movp(T, Src0);
2399 _pcmpeq(T, LEGAL_HACK(Src1));
2400 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2401 _pxor(T, MinusOne);
2402 } break;
2403 case InstIcmp::Ugt:
2404 case InstIcmp::Sgt: {
2405 _movp(T, Src0);
2406 _pcmpgt(T, LEGAL_HACK(Src1));
2407 } break;
2408 case InstIcmp::Uge:
2409 case InstIcmp::Sge: {
2410 // !(Src1 > Src0)
2411 _movp(T, Src1);
2412 _pcmpgt(T, LEGAL_HACK(Src0));
2413 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2414 _pxor(T, MinusOne);
2415 } break;
2416 case InstIcmp::Ult:
2417 case InstIcmp::Slt: {
2418 _movp(T, Src1);
2419 _pcmpgt(T, LEGAL_HACK(Src0));
2420 } break;
2421 case InstIcmp::Ule:
2422 case InstIcmp::Sle: {
2423 // !(Src0 > Src1)
2424 _movp(T, Src0);
2425 _pcmpgt(T, LEGAL_HACK(Src1));
2426 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2427 _pxor(T, MinusOne);
2428 } break;
2429 }
2430#undef LEGAL_HACK
2431
2432 _movp(Dest, T);
Matt Walace0ca8f2014-07-24 12:34:20 -07002433 eliminateNextVectorSextInstruction(Dest);
Matt Wala9a0168a2014-07-23 14:56:10 -07002434 return;
2435 }
2436
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002437 // If Src1 is an immediate, or known to be a physical register, we can
2438 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2439 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2440 // the physical register, but unfortunately we have to commit to one or
2441 // the other before register allocation.)
2442 bool IsSrc1ImmOrReg = false;
2443 if (llvm::isa<Constant>(Src1)) {
2444 IsSrc1ImmOrReg = true;
2445 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
2446 if (Var->hasReg())
2447 IsSrc1ImmOrReg = true;
2448 }
2449
2450 // Try to fuse a compare immediately followed by a conditional branch. This
2451 // is possible when the compare dest and the branch source operands are the
2452 // same, and are their only uses. TODO: implement this optimization for i64.
2453 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
2454 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
2455 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
2456 Operand *Src0New =
2457 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
2458 _cmp(Src0New, Src1);
2459 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2460 NextBr->getTargetFalse());
2461 // Skip over the following branch instruction.
2462 NextBr->setDeleted();
2463 Context.advanceNext();
2464 return;
2465 }
2466 }
2467
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002468 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
Matt Wala43ff7eb2014-06-18 10:30:07 -07002469 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002470 Constant *One = Ctx->getConstantInt(IceType_i32, 1);
2471 if (Src0->getType() == IceType_i64) {
2472 InstIcmp::ICond Condition = Inst->getCondition();
2473 size_t Index = static_cast<size_t>(Condition);
2474 assert(Index < TableIcmp64Size);
2475 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2476 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2477 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2478 InstX8632Label *Label = InstX8632Label::create(Func, this);
2479 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
2480 _cmp(loOperand(Src0), Src1LoRI);
2481 _br(InstX8632Br::Br_ne, Label);
2482 _cmp(hiOperand(Src0), Src1HiRI);
2483 _br(InstX8632Br::Br_ne, Label);
2484 Context.insert(InstFakeUse::create(Func, Dest));
2485 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
2486 Context.insert(Label);
2487 } else {
2488 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2489 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2490 _mov(Dest, One);
2491 _cmp(hiOperand(Src0), Src1HiRI);
2492 _br(TableIcmp64[Index].C1, LabelTrue);
2493 _br(TableIcmp64[Index].C2, LabelFalse);
2494 _cmp(loOperand(Src0), Src1LoRI);
2495 _br(TableIcmp64[Index].C3, LabelTrue);
2496 Context.insert(LabelFalse);
2497 Context.insert(InstFakeUse::create(Func, Dest));
2498 _mov(Dest, Zero);
2499 Context.insert(LabelTrue);
2500 }
2501 return;
2502 }
2503
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002504 // cmp b, c
2505 Operand *Src0New =
2506 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
2507 InstX8632Label *Label = InstX8632Label::create(Func, this);
2508 _cmp(Src0New, Src1);
2509 _mov(Dest, One);
2510 _br(getIcmp32Mapping(Inst->getCondition()), Label);
2511 Context.insert(InstFakeUse::create(Func, Dest));
2512 _mov(Dest, Zero);
2513 Context.insert(Label);
2514}
2515
Matt Wala49889232014-07-18 12:45:09 -07002516void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
2517 Operand *SourceVectOperand = Inst->getSrc(0);
2518 Operand *ElementToInsert = Inst->getSrc(1);
2519 ConstantInteger *ElementIndex =
2520 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
2521 // Only constant indices are allowed in PNaCl IR.
2522 assert(ElementIndex);
2523 unsigned Index = ElementIndex->getValue();
2524
2525 Type Ty = SourceVectOperand->getType();
2526 Type ElementTy = typeElementType(Ty);
2527 Type InVectorElementTy = getInVectorElementType(Ty);
2528
2529 if (ElementTy == IceType_i1) {
2530 // Expand the element to the appropriate size for it to be inserted
2531 // in the vector.
2532 Variable *Expanded =
2533 Func->makeVariable(InVectorElementTy, Context.getNode());
2534 InstCast *Cast =
2535 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert);
2536 lowerCast(Cast);
2537 ElementToInsert = Expanded;
2538 }
2539
2540 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2541 // Lower insertelement with 32-bit wide elements using shufps.
2542 // TODO(wala): SSE4.1 has pinsrd and insertps.
2543 Variable *Element = NULL;
2544 if (InVectorElementTy == IceType_f32) {
2545 // Element will be in an XMM register since it is floating point.
2546 Element = legalizeToVar(ElementToInsert);
2547 } else {
2548 // Copy an integer to an XMM register.
2549 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem);
2550 Element = makeReg(Ty);
2551 _movd(Element, T);
2552 }
2553
2554 // shufps treats the source and desination operands as vectors of
2555 // four doublewords. The destination's two high doublewords are
2556 // selected from the source operand and the two low doublewords are
2557 // selected from the (original value of) the destination operand.
2558 // An insertelement operation can be effected with a sequence of two
2559 // shufps operations with appropriate masks. In all cases below,
2560 // Element[0] is being inserted into SourceVectOperand. Indices are
2561 // ordered from left to right.
2562 //
2563 // insertelement into index 0 (result is stored in Element):
2564 // Element := Element[0, 0] SourceVectOperand[0, 1]
2565 // Element := Element[0, 3] SourceVectOperand[2, 3]
2566 //
2567 // insertelement into index 1 (result is stored in Element):
2568 // Element := Element[0, 0] SourceVectOperand[0, 0]
2569 // Element := Element[3, 0] SourceVectOperand[2, 3]
2570 //
2571 // insertelement into index 2 (result is stored in T):
2572 // T := SourceVectOperand
2573 // Element := Element[0, 0] T[0, 3]
2574 // T := T[0, 1] Element[0, 3]
2575 //
2576 // insertelement into index 3 (result is stored in T):
2577 // T := SourceVectOperand
2578 // Element := Element[0, 0] T[0, 2]
2579 // T := T[0, 1] Element[3, 0]
2580 const unsigned char Mask1[4] = {64, 0, 192, 128};
2581 const unsigned char Mask2[4] = {236, 227, 196, 52};
2582
2583 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]);
2584 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]);
2585
2586 // ALIGNHACK: Force vector operands to registers in instructions that
2587 // require aligned memory operands until support for stack alignment
2588 // is implemented.
2589#define ALIGN_HACK(Vect) legalizeToVar((Vect))
2590 if (Index < 2) {
2591 SourceVectOperand = ALIGN_HACK(SourceVectOperand);
2592 _shufps(Element, SourceVectOperand, Mask1Constant);
2593 _shufps(Element, SourceVectOperand, Mask2Constant);
2594 _movp(Inst->getDest(), Element);
2595 } else {
2596 Variable *T = makeReg(Ty);
2597 _movp(T, SourceVectOperand);
2598 _shufps(Element, T, Mask1Constant);
2599 _shufps(T, Element, Mask2Constant);
2600 _movp(Inst->getDest(), T);
2601 }
2602#undef ALIGN_HACK
2603 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
2604 Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg);
2605 Variable *T = makeReg(Ty);
2606 _movp(T, SourceVectOperand);
2607 _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index));
2608 _movp(Inst->getDest(), T);
2609 } else {
2610 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2611 // Spill the value to a stack slot and perform the insertion in
2612 // memory.
2613 // TODO(wala): SSE4.1 has pinsrb.
2614 //
2615 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
2616 // support for legalizing to mem is implemented.
2617 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2618 Slot->setWeight(RegWeight::Zero);
2619 _movp(Slot, legalizeToVar(SourceVectOperand));
2620
2621 // Compute the location of the position to insert in memory.
2622 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2623 OperandX8632Mem *Loc =
2624 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2625 _store(legalizeToVar(ElementToInsert), Loc);
2626
2627 Variable *T = makeReg(Ty);
2628 _movp(T, Slot);
2629 _movp(Inst->getDest(), T);
2630 }
2631}
2632
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002633void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2634 switch (Instr->getIntrinsicInfo().ID) {
Jan Vounga3a01a22014-07-14 10:32:41 -07002635 case Intrinsics::AtomicCmpxchg: {
Jan Voung5cd240d2014-06-25 10:36:46 -07002636 if (!Intrinsics::VerifyMemoryOrder(
2637 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
2638 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2639 return;
2640 }
2641 if (!Intrinsics::VerifyMemoryOrder(
2642 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
2643 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2644 return;
2645 }
Jan Vounga3a01a22014-07-14 10:32:41 -07002646 Variable *DestPrev = Instr->getDest();
2647 Operand *PtrToMem = Instr->getArg(0);
2648 Operand *Expected = Instr->getArg(1);
2649 Operand *Desired = Instr->getArg(2);
2650 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
2651 // TODO(jvoung): If we peek ahead a few instructions and see how
2652 // DestPrev is used (typically via another compare and branch),
2653 // we may be able to optimize. If the result truly is used by a
2654 // compare + branch, and the comparison is for equality, then we can
2655 // optimize out the later compare, and fuse with the later branch.
Jan Voung5cd240d2014-06-25 10:36:46 -07002656 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002657 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002658 case Intrinsics::AtomicFence:
Jan Voung5cd240d2014-06-25 10:36:46 -07002659 if (!Intrinsics::VerifyMemoryOrder(
2660 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
2661 Func->setError("Unexpected memory ordering for AtomicFence");
2662 return;
2663 }
2664 _mfence();
2665 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002666 case Intrinsics::AtomicFenceAll:
Jan Voung5cd240d2014-06-25 10:36:46 -07002667 // NOTE: FenceAll should prevent and load/store from being moved
2668 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2669 // instruction is currently marked coarsely as "HasSideEffects".
2670 _mfence();
2671 return;
2672 case Intrinsics::AtomicIsLockFree: {
2673 // X86 is always lock free for 8/16/32/64 bit accesses.
2674 // TODO(jvoung): Since the result is constant when given a constant
2675 // byte size, this opens up DCE opportunities.
2676 Operand *ByteSize = Instr->getArg(0);
2677 Variable *Dest = Instr->getDest();
2678 if (ConstantInteger *CI = llvm::dyn_cast<ConstantInteger>(ByteSize)) {
2679 Constant *Result;
2680 switch (CI->getValue()) {
2681 default:
2682 // Some x86-64 processors support the cmpxchg16b intruction, which
2683 // can make 16-byte operations lock free (when used with the LOCK
2684 // prefix). However, that's not supported in 32-bit mode, so just
2685 // return 0 even for large sizes.
2686 Result = Ctx->getConstantZero(IceType_i32);
2687 break;
2688 case 1:
2689 case 2:
2690 case 4:
2691 case 8:
2692 Result = Ctx->getConstantInt(IceType_i32, 1);
2693 break;
2694 }
2695 _mov(Dest, Result);
2696 return;
2697 }
2698 // The PNaCl ABI requires the byte size to be a compile-time constant.
2699 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2700 return;
2701 }
2702 case Intrinsics::AtomicLoad: {
2703 // We require the memory address to be naturally aligned.
2704 // Given that is the case, then normal loads are atomic.
2705 if (!Intrinsics::VerifyMemoryOrder(
2706 llvm::cast<ConstantInteger>(Instr->getArg(1))->getValue())) {
2707 Func->setError("Unexpected memory ordering for AtomicLoad");
2708 return;
2709 }
2710 Variable *Dest = Instr->getDest();
2711 if (Dest->getType() == IceType_i64) {
2712 // Follow what GCC does and use a movq instead of what lowerLoad()
2713 // normally does (split the load into two).
2714 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2715 // can't happen anyway, since this is x86-32 and integer arithmetic only
2716 // happens on 32-bit quantities.
2717 Variable *T = makeReg(IceType_f64);
2718 OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64);
2719 _movq(T, Addr);
2720 // Then cast the bits back out of the XMM register to the i64 Dest.
2721 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2722 lowerCast(Cast);
2723 // Make sure that the atomic load isn't elided.
2724 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2725 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2726 return;
2727 }
2728 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2729 lowerLoad(Load);
2730 // Make sure the atomic load isn't elided.
2731 Context.insert(InstFakeUse::create(Func, Dest));
2732 return;
2733 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002734 case Intrinsics::AtomicRMW:
Jan Voung5cd240d2014-06-25 10:36:46 -07002735 if (!Intrinsics::VerifyMemoryOrder(
2736 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
2737 Func->setError("Unexpected memory ordering for AtomicRMW");
2738 return;
2739 }
2740 lowerAtomicRMW(Instr->getDest(),
2741 static_cast<uint32_t>(llvm::cast<ConstantInteger>(
2742 Instr->getArg(0))->getValue()),
2743 Instr->getArg(1), Instr->getArg(2));
2744 return;
2745 case Intrinsics::AtomicStore: {
2746 if (!Intrinsics::VerifyMemoryOrder(
2747 llvm::cast<ConstantInteger>(Instr->getArg(2))->getValue())) {
2748 Func->setError("Unexpected memory ordering for AtomicStore");
2749 return;
2750 }
2751 // We require the memory address to be naturally aligned.
2752 // Given that is the case, then normal stores are atomic.
2753 // Add a fence after the store to make it visible.
2754 Operand *Value = Instr->getArg(0);
2755 Operand *Ptr = Instr->getArg(1);
2756 if (Value->getType() == IceType_i64) {
2757 // Use a movq instead of what lowerStore() normally does
2758 // (split the store into two), following what GCC does.
2759 // Cast the bits from int -> to an xmm register first.
2760 Variable *T = makeReg(IceType_f64);
2761 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2762 lowerCast(Cast);
2763 // Then store XMM w/ a movq.
2764 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64);
2765 _storeq(T, Addr);
2766 _mfence();
2767 return;
2768 }
2769 InstStore *Store = InstStore::create(Func, Value, Ptr);
2770 lowerStore(Store);
2771 _mfence();
2772 return;
2773 }
Jan Voung7fa813b2014-07-18 13:01:08 -07002774 case Intrinsics::Bswap: {
2775 Variable *Dest = Instr->getDest();
2776 Operand *Val = Instr->getArg(0);
2777 // In 32-bit mode, bswap only works on 32-bit arguments, and the
2778 // argument must be a register. Use rotate left for 16-bit bswap.
2779 if (Val->getType() == IceType_i64) {
2780 Variable *T_Lo = legalizeToVar(loOperand(Val));
2781 Variable *T_Hi = legalizeToVar(hiOperand(Val));
2782 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2783 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2784 _bswap(T_Lo);
2785 _bswap(T_Hi);
2786 _mov(DestLo, T_Hi);
2787 _mov(DestHi, T_Lo);
2788 } else if (Val->getType() == IceType_i32) {
2789 Variable *T = legalizeToVar(Val);
2790 _bswap(T);
2791 _mov(Dest, T);
2792 } else {
2793 assert(Val->getType() == IceType_i16);
2794 Val = legalize(Val);
2795 Constant *Eight = Ctx->getConstantInt(IceType_i16, 8);
2796 Variable *T = NULL;
2797 _mov(T, Val);
2798 _rol(T, Eight);
2799 _mov(Dest, T);
2800 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002801 return;
Jan Voung7fa813b2014-07-18 13:01:08 -07002802 }
Jan Vounge4da26f2014-07-15 17:52:39 -07002803 case Intrinsics::Ctpop: {
2804 Variable *Dest = Instr->getDest();
2805 Operand *Val = Instr->getArg(0);
2806 InstCall *Call = makeHelperCall(Val->getType() == IceType_i64 ?
2807 "__popcountdi2" : "__popcountsi2", Dest, 1);
2808 Call->addArg(Val);
2809 lowerCall(Call);
2810 // The popcount helpers always return 32-bit values, while the intrinsic's
2811 // signature matches the native POPCNT instruction and fills a 64-bit reg
2812 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
2813 // the user doesn't do that in the IR. If the user does that in the IR,
2814 // then this zero'ing instruction is dead and gets optimized out.
2815 if (Val->getType() == IceType_i64) {
2816 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2817 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2818 _mov(DestHi, Zero);
2819 }
2820 return;
2821 }
2822 case Intrinsics::Ctlz: {
2823 // The "is zero undef" parameter is ignored and we always return
2824 // a well-defined value.
2825 Operand *Val = legalize(Instr->getArg(0));
2826 Operand *FirstVal;
2827 Operand *SecondVal = NULL;
2828 if (Val->getType() == IceType_i64) {
2829 FirstVal = loOperand(Val);
2830 SecondVal = hiOperand(Val);
2831 } else {
2832 FirstVal = Val;
2833 }
2834 const bool IsCttz = false;
2835 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
2836 SecondVal);
2837 return;
2838 }
2839 case Intrinsics::Cttz: {
2840 // The "is zero undef" parameter is ignored and we always return
2841 // a well-defined value.
2842 Operand *Val = legalize(Instr->getArg(0));
2843 Operand *FirstVal;
2844 Operand *SecondVal = NULL;
2845 if (Val->getType() == IceType_i64) {
2846 FirstVal = hiOperand(Val);
2847 SecondVal = loOperand(Val);
2848 } else {
2849 FirstVal = Val;
2850 }
2851 const bool IsCttz = true;
2852 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
2853 SecondVal);
2854 return;
2855 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002856 case Intrinsics::Longjmp: {
2857 InstCall *Call = makeHelperCall("longjmp", NULL, 2);
2858 Call->addArg(Instr->getArg(0));
2859 Call->addArg(Instr->getArg(1));
2860 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002861 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002862 }
2863 case Intrinsics::Memcpy: {
2864 // In the future, we could potentially emit an inline memcpy/memset, etc.
2865 // for intrinsic calls w/ a known length.
2866 InstCall *Call = makeHelperCall("memcpy", NULL, 3);
2867 Call->addArg(Instr->getArg(0));
2868 Call->addArg(Instr->getArg(1));
2869 Call->addArg(Instr->getArg(2));
2870 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002871 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002872 }
2873 case Intrinsics::Memmove: {
2874 InstCall *Call = makeHelperCall("memmove", NULL, 3);
2875 Call->addArg(Instr->getArg(0));
2876 Call->addArg(Instr->getArg(1));
2877 Call->addArg(Instr->getArg(2));
2878 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002879 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002880 }
2881 case Intrinsics::Memset: {
2882 // The value operand needs to be extended to a stack slot size
Jan Voung7b34b592014-07-18 13:12:58 -07002883 // because "push" only works for a specific operand size.
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002884 Operand *ValOp = Instr->getArg(1);
2885 assert(ValOp->getType() == IceType_i8);
Jan Voung957c50d2014-07-21 14:05:29 -07002886 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode());
2887 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002888 InstCall *Call = makeHelperCall("memset", NULL, 3);
2889 Call->addArg(Instr->getArg(0));
2890 Call->addArg(ValExt);
2891 Call->addArg(Instr->getArg(2));
2892 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002893 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002894 }
2895 case Intrinsics::NaClReadTP: {
Jan Voung5cd240d2014-06-25 10:36:46 -07002896 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002897 Operand *Src = OperandX8632Mem::create(Func, IceType_i32, NULL, Zero, NULL,
2898 0, OperandX8632Mem::SegReg_GS);
2899 Variable *Dest = Instr->getDest();
2900 Variable *T = NULL;
2901 _mov(T, Src);
2902 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07002903 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002904 }
2905 case Intrinsics::Setjmp: {
2906 InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1);
2907 Call->addArg(Instr->getArg(0));
2908 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002909 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002910 }
Jan Voungf37fbbe2014-07-09 16:13:13 -07002911 case Intrinsics::Sqrt: {
2912 Operand *Src = legalize(Instr->getArg(0));
2913 Variable *Dest = Instr->getDest();
2914 Variable *T = makeReg(Dest->getType());
2915 _sqrtss(T, Src);
2916 _mov(Dest, T);
2917 return;
2918 }
Jan Voung7b34b592014-07-18 13:12:58 -07002919 case Intrinsics::Stacksave: {
2920 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
2921 Variable *Dest = Instr->getDest();
2922 _mov(Dest, esp);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002923 return;
Jan Voung7b34b592014-07-18 13:12:58 -07002924 }
2925 case Intrinsics::Stackrestore: {
2926 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
2927 _mov(esp, Instr->getArg(0));
2928 return;
2929 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002930 case Intrinsics::Trap:
2931 _ud2();
Jan Voung5cd240d2014-06-25 10:36:46 -07002932 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002933 case Intrinsics::UnknownIntrinsic:
2934 Func->setError("Should not be lowering UnknownIntrinsic");
2935 return;
2936 }
2937 return;
2938}
2939
Jan Vounga3a01a22014-07-14 10:32:41 -07002940void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
2941 Operand *Expected, Operand *Desired) {
2942 if (Expected->getType() == IceType_i64) {
2943 // Reserve the pre-colored registers first, before adding any more
2944 // infinite-weight variables from FormMemoryOperand's legalization.
2945 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
2946 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
2947 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
2948 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
2949 _mov(T_eax, loOperand(Expected));
2950 _mov(T_edx, hiOperand(Expected));
2951 _mov(T_ebx, loOperand(Desired));
2952 _mov(T_ecx, hiOperand(Desired));
2953 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2954 const bool Locked = true;
2955 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
2956 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
2957 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
2958 _mov(DestLo, T_eax);
2959 _mov(DestHi, T_edx);
2960 return;
2961 }
2962 Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
2963 _mov(T_eax, Expected);
2964 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2965 Variable *DesiredReg = legalizeToVar(Desired);
2966 const bool Locked = true;
2967 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
2968 _mov(DestPrev, T_eax);
2969}
2970
Jan Voung5cd240d2014-06-25 10:36:46 -07002971void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
2972 Operand *Ptr, Operand *Val) {
Jan Vounga3a01a22014-07-14 10:32:41 -07002973 bool NeedsCmpxchg = false;
2974 LowerBinOp Op_Lo = NULL;
2975 LowerBinOp Op_Hi = NULL;
Jan Voung5cd240d2014-06-25 10:36:46 -07002976 switch (Operation) {
2977 default:
2978 Func->setError("Unknown AtomicRMW operation");
2979 return;
2980 case Intrinsics::AtomicAdd: {
2981 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07002982 // All the fall-through paths must set this to true, but use this
2983 // for asserting.
2984 NeedsCmpxchg = true;
2985 Op_Lo = &TargetX8632::_add;
2986 Op_Hi = &TargetX8632::_adc;
2987 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07002988 }
2989 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2990 const bool Locked = true;
2991 Variable *T = NULL;
2992 _mov(T, Val);
2993 _xadd(Addr, T, Locked);
2994 _mov(Dest, T);
2995 return;
2996 }
2997 case Intrinsics::AtomicSub: {
2998 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07002999 NeedsCmpxchg = true;
3000 Op_Lo = &TargetX8632::_sub;
3001 Op_Hi = &TargetX8632::_sbb;
3002 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003003 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003004 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3005 const bool Locked = true;
3006 Variable *T = NULL;
3007 _mov(T, Val);
3008 _neg(T);
3009 _xadd(Addr, T, Locked);
3010 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003011 return;
3012 }
3013 case Intrinsics::AtomicOr:
Jan Vounga3a01a22014-07-14 10:32:41 -07003014 // TODO(jvoung): If Dest is null or dead, then some of these
3015 // operations do not need an "exchange", but just a locked op.
3016 // That appears to be "worth" it for sub, or, and, and xor.
3017 // xadd is probably fine vs lock add for add, and xchg is fine
3018 // vs an atomic store.
3019 NeedsCmpxchg = true;
3020 Op_Lo = &TargetX8632::_or;
3021 Op_Hi = &TargetX8632::_or;
3022 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003023 case Intrinsics::AtomicAnd:
Jan Vounga3a01a22014-07-14 10:32:41 -07003024 NeedsCmpxchg = true;
3025 Op_Lo = &TargetX8632::_and;
3026 Op_Hi = &TargetX8632::_and;
3027 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003028 case Intrinsics::AtomicXor:
Jan Vounga3a01a22014-07-14 10:32:41 -07003029 NeedsCmpxchg = true;
3030 Op_Lo = &TargetX8632::_xor;
3031 Op_Hi = &TargetX8632::_xor;
3032 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003033 case Intrinsics::AtomicExchange:
Jan Vounga3a01a22014-07-14 10:32:41 -07003034 if (Dest->getType() == IceType_i64) {
3035 NeedsCmpxchg = true;
3036 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3037 // just need to be moved to the ecx and ebx registers.
3038 Op_Lo = NULL;
3039 Op_Hi = NULL;
3040 break;
3041 }
3042 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3043 Variable *T = NULL;
3044 _mov(T, Val);
3045 _xchg(Addr, T);
3046 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003047 return;
3048 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003049 // Otherwise, we need a cmpxchg loop.
3050 assert(NeedsCmpxchg);
3051 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3052}
3053
3054void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
3055 Variable *Dest, Operand *Ptr,
3056 Operand *Val) {
3057 // Expand a more complex RMW operation as a cmpxchg loop:
3058 // For 64-bit:
3059 // mov eax, [ptr]
3060 // mov edx, [ptr + 4]
3061 // .LABEL:
3062 // mov ebx, eax
3063 // <Op_Lo> ebx, <desired_adj_lo>
3064 // mov ecx, edx
3065 // <Op_Hi> ecx, <desired_adj_hi>
3066 // lock cmpxchg8b [ptr]
3067 // jne .LABEL
3068 // mov <dest_lo>, eax
3069 // mov <dest_lo>, edx
3070 //
3071 // For 32-bit:
3072 // mov eax, [ptr]
3073 // .LABEL:
3074 // mov <reg>, eax
3075 // op <reg>, [desired_adj]
3076 // lock cmpxchg [ptr], <reg>
3077 // jne .LABEL
3078 // mov <dest>, eax
3079 //
3080 // If Op_{Lo,Hi} are NULL, then just copy the value.
3081 Val = legalize(Val);
3082 Type Ty = Val->getType();
3083 if (Ty == IceType_i64) {
3084 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
3085 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
3086 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3087 _mov(T_eax, loOperand(Addr));
3088 _mov(T_edx, hiOperand(Addr));
3089 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
3090 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
3091 InstX8632Label *Label = InstX8632Label::create(Func, this);
3092 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
3093 if (!IsXchg8b) {
3094 Context.insert(Label);
3095 _mov(T_ebx, T_eax);
3096 (this->*Op_Lo)(T_ebx, loOperand(Val));
3097 _mov(T_ecx, T_edx);
3098 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3099 } else {
3100 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3101 // It just needs the Val loaded into ebx and ecx.
3102 // That can also be done before the loop.
3103 _mov(T_ebx, loOperand(Val));
3104 _mov(T_ecx, hiOperand(Val));
3105 Context.insert(Label);
3106 }
3107 const bool Locked = true;
3108 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3109 _br(InstX8632Br::Br_ne, Label);
3110 if (!IsXchg8b) {
3111 // If Val is a variable, model the extended live range of Val through
3112 // the end of the loop, since it will be re-used by the loop.
3113 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3114 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3115 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3116 Context.insert(InstFakeUse::create(Func, ValLo));
3117 Context.insert(InstFakeUse::create(Func, ValHi));
3118 }
3119 } else {
3120 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3121 Context.insert(InstFakeUse::create(Func, T_ebx));
3122 Context.insert(InstFakeUse::create(Func, T_ecx));
3123 }
3124 // The address base is also reused in the loop.
3125 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3126 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3127 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3128 _mov(DestLo, T_eax);
3129 _mov(DestHi, T_edx);
3130 return;
3131 }
3132 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3133 Variable *T_eax = makeReg(Ty, Reg_eax);
3134 _mov(T_eax, Addr);
3135 InstX8632Label *Label = InstX8632Label::create(Func, this);
3136 Context.insert(Label);
3137 // We want to pick a different register for T than Eax, so don't use
3138 // _mov(T == NULL, T_eax).
3139 Variable *T = makeReg(Ty);
3140 _mov(T, T_eax);
3141 (this->*Op_Lo)(T, Val);
3142 const bool Locked = true;
3143 _cmpxchg(Addr, T_eax, T, Locked);
3144 _br(InstX8632Br::Br_ne, Label);
3145 // If Val is a variable, model the extended live range of Val through
3146 // the end of the loop, since it will be re-used by the loop.
3147 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3148 Context.insert(InstFakeUse::create(Func, ValVar));
3149 }
3150 // The address base is also reused in the loop.
3151 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3152 _mov(Dest, T_eax);
Jan Voung5cd240d2014-06-25 10:36:46 -07003153}
3154
Jan Vounge4da26f2014-07-15 17:52:39 -07003155// Lowers count {trailing, leading} zeros intrinsic.
3156//
3157// We could do constant folding here, but that should have
3158// been done by the front-end/middle-end optimizations.
3159void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3160 Operand *FirstVal, Operand *SecondVal) {
3161 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3162 // Then the instructions will handle the Val == 0 case much more simply
3163 // and won't require conversion from bit position to number of zeros.
3164 //
3165 // Otherwise:
3166 // bsr IF_NOT_ZERO, Val
3167 // mov T_DEST, 63
3168 // cmovne T_DEST, IF_NOT_ZERO
3169 // xor T_DEST, 31
3170 // mov DEST, T_DEST
3171 //
3172 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3173 // register. Also, bsf and bsr require their dest to be a register.
3174 //
3175 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3176 // E.g., for 000... 00001100, bsr will say that the most significant bit
3177 // set is at position 3, while the number of leading zeros is 28. Xor is
3178 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3179 //
3180 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3181 // are all zero, and compute the result for that case (checking the lower
3182 // 32 bits). Then actually compute the result for the upper bits and
3183 // cmov in the result from the lower computation if the earlier speculation
3184 // was correct.
3185 //
3186 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3187 // bit position conversion, and the speculation is reversed.
3188 assert(Ty == IceType_i32 || Ty == IceType_i64);
3189 Variable *T = makeReg(IceType_i32);
Matt Wala53c5e602014-07-22 16:39:38 -07003190 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
Jan Vounge4da26f2014-07-15 17:52:39 -07003191 if (Cttz) {
Matt Wala53c5e602014-07-22 16:39:38 -07003192 _bsf(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003193 } else {
Matt Wala53c5e602014-07-22 16:39:38 -07003194 _bsr(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003195 }
3196 Variable *T_Dest = makeReg(IceType_i32);
3197 Constant *ThirtyTwo = Ctx->getConstantInt(IceType_i32, 32);
3198 Constant *ThirtyOne = Ctx->getConstantInt(IceType_i32, 31);
3199 if (Cttz) {
3200 _mov(T_Dest, ThirtyTwo);
3201 } else {
3202 Constant *SixtyThree = Ctx->getConstantInt(IceType_i32, 63);
3203 _mov(T_Dest, SixtyThree);
3204 }
3205 _cmov(T_Dest, T, InstX8632::Br_ne);
3206 if (!Cttz) {
3207 _xor(T_Dest, ThirtyOne);
3208 }
3209 if (Ty == IceType_i32) {
3210 _mov(Dest, T_Dest);
3211 return;
3212 }
3213 _add(T_Dest, ThirtyTwo);
3214 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3215 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3216 // Will be using "test" on this, so we need a registerized variable.
3217 Variable *SecondVar = legalizeToVar(SecondVal);
3218 Variable *T_Dest2 = makeReg(IceType_i32);
3219 if (Cttz) {
3220 _bsf(T_Dest2, SecondVar);
3221 } else {
3222 _bsr(T_Dest2, SecondVar);
3223 _xor(T_Dest2, ThirtyOne);
3224 }
3225 _test(SecondVar, SecondVar);
3226 _cmov(T_Dest2, T_Dest, InstX8632::Br_e);
3227 _mov(DestLo, T_Dest2);
3228 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3229}
3230
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003231namespace {
3232
3233bool isAdd(const Inst *Inst) {
3234 if (const InstArithmetic *Arith =
3235 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3236 return (Arith->getOp() == InstArithmetic::Add);
3237 }
3238 return false;
3239}
3240
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003241void computeAddressOpt(Variable *&Base, Variable *&Index, uint16_t &Shift,
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003242 int32_t &Offset) {
3243 (void)Offset; // TODO: pattern-match for non-zero offsets.
3244 if (Base == NULL)
3245 return;
3246 // If the Base has more than one use or is live across multiple
3247 // blocks, then don't go further. Alternatively (?), never consider
3248 // a transformation that would change a variable that is currently
3249 // *not* live across basic block boundaries into one that *is*.
3250 if (Base->isMultiblockLife() /* || Base->getUseCount() > 1*/)
3251 return;
3252
3253 while (true) {
3254 // Base is Base=Var ==>
3255 // set Base=Var
3256 const Inst *BaseInst = Base->getDefinition();
3257 Operand *BaseOperand0 = BaseInst ? BaseInst->getSrc(0) : NULL;
3258 Variable *BaseVariable0 = llvm::dyn_cast_or_null<Variable>(BaseOperand0);
3259 // TODO: Helper function for all instances of assignment
3260 // transitivity.
3261 if (BaseInst && llvm::isa<InstAssign>(BaseInst) && BaseVariable0 &&
3262 // TODO: ensure BaseVariable0 stays single-BB
3263 true) {
3264 Base = BaseVariable0;
3265 continue;
3266 }
3267
3268 // Index is Index=Var ==>
3269 // set Index=Var
3270
3271 // Index==NULL && Base is Base=Var1+Var2 ==>
3272 // set Base=Var1, Index=Var2, Shift=0
3273 Operand *BaseOperand1 =
3274 BaseInst && BaseInst->getSrcSize() >= 2 ? BaseInst->getSrc(1) : NULL;
3275 Variable *BaseVariable1 = llvm::dyn_cast_or_null<Variable>(BaseOperand1);
3276 if (Index == NULL && isAdd(BaseInst) && BaseVariable0 && BaseVariable1 &&
3277 // TODO: ensure BaseVariable0 and BaseVariable1 stay single-BB
3278 true) {
3279 Base = BaseVariable0;
3280 Index = BaseVariable1;
3281 Shift = 0; // should already have been 0
3282 continue;
3283 }
3284
3285 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
3286 // Index=Var, Shift+=log2(Const)
3287 const Inst *IndexInst = Index ? Index->getDefinition() : NULL;
3288 if (const InstArithmetic *ArithInst =
3289 llvm::dyn_cast_or_null<InstArithmetic>(IndexInst)) {
3290 Operand *IndexOperand0 = ArithInst->getSrc(0);
3291 Variable *IndexVariable0 = llvm::dyn_cast<Variable>(IndexOperand0);
3292 Operand *IndexOperand1 = ArithInst->getSrc(1);
3293 ConstantInteger *IndexConstant1 =
3294 llvm::dyn_cast<ConstantInteger>(IndexOperand1);
3295 if (ArithInst->getOp() == InstArithmetic::Mul && IndexVariable0 &&
3296 IndexOperand1->getType() == IceType_i32 && IndexConstant1) {
3297 uint64_t Mult = IndexConstant1->getValue();
3298 uint32_t LogMult;
3299 switch (Mult) {
3300 case 1:
3301 LogMult = 0;
3302 break;
3303 case 2:
3304 LogMult = 1;
3305 break;
3306 case 4:
3307 LogMult = 2;
3308 break;
3309 case 8:
3310 LogMult = 3;
3311 break;
3312 default:
3313 LogMult = 4;
3314 break;
3315 }
3316 if (Shift + LogMult <= 3) {
3317 Index = IndexVariable0;
3318 Shift += LogMult;
3319 continue;
3320 }
3321 }
3322 }
3323
3324 // Index is Index=Var<<Const && Const+Shift<=3 ==>
3325 // Index=Var, Shift+=Const
3326
3327 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
3328 // Index=Var, Shift+=log2(Const)
3329
3330 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
3331 // swap(Index,Base)
3332 // Similar for Base=Const*Var and Base=Var<<Const
3333
3334 // Base is Base=Var+Const ==>
3335 // set Base=Var, Offset+=Const
3336
3337 // Base is Base=Const+Var ==>
3338 // set Base=Var, Offset+=Const
3339
3340 // Base is Base=Var-Const ==>
3341 // set Base=Var, Offset-=Const
3342
3343 // Index is Index=Var+Const ==>
3344 // set Index=Var, Offset+=(Const<<Shift)
3345
3346 // Index is Index=Const+Var ==>
3347 // set Index=Var, Offset+=(Const<<Shift)
3348
3349 // Index is Index=Var-Const ==>
3350 // set Index=Var, Offset-=(Const<<Shift)
3351
3352 // TODO: consider overflow issues with respect to Offset.
3353 // TODO: handle symbolic constants.
3354 break;
3355 }
3356}
3357
3358} // anonymous namespace
3359
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003360void TargetX8632::lowerLoad(const InstLoad *Inst) {
3361 // A Load instruction can be treated the same as an Assign
3362 // instruction, after the source operand is transformed into an
3363 // OperandX8632Mem operand. Note that the address mode
3364 // optimization already creates an OperandX8632Mem operand, so it
3365 // doesn't need another level of transformation.
3366 Type Ty = Inst->getDest()->getType();
Jan Voung5cd240d2014-06-25 10:36:46 -07003367 Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003368
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003369 // Fuse this load with a subsequent Arithmetic instruction in the
3370 // following situations:
3371 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
3372 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
3373 //
3374 // TODO: Clean up and test thoroughly.
Jan Voung5cd240d2014-06-25 10:36:46 -07003375 // (E.g., if there is an mfence-all make sure the load ends up on the
3376 // same side of the fence).
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003377 //
3378 // TODO: Why limit to Arithmetic instructions? This could probably be
3379 // applied to most any instruction type. Look at all source operands
3380 // in the following instruction, and if there is one instance of the
3381 // load instruction's dest variable, and that instruction ends that
3382 // variable's live range, then make the substitution. Deal with
3383 // commutativity optimization in the arithmetic instruction lowering.
3384 InstArithmetic *NewArith = NULL;
3385 if (InstArithmetic *Arith =
3386 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
3387 Variable *DestLoad = Inst->getDest();
3388 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
3389 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
3390 if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
3391 DestLoad != Src0Arith) {
3392 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3393 Arith->getSrc(0), Src0);
3394 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
3395 Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
3396 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3397 Arith->getSrc(1), Src0);
3398 }
3399 if (NewArith) {
3400 Arith->setDeleted();
3401 Context.advanceNext();
3402 lowerArithmetic(NewArith);
3403 return;
3404 }
3405 }
3406
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003407 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
3408 lowerAssign(Assign);
3409}
3410
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003411void TargetX8632::doAddressOptLoad() {
3412 Inst *Inst = *Context.getCur();
3413 Variable *Dest = Inst->getDest();
3414 Operand *Addr = Inst->getSrc(0);
3415 Variable *Index = NULL;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003416 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003417 int32_t Offset = 0; // TODO: make Constant
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003418 // Vanilla ICE load instructions should not use the segment registers,
3419 // and computeAddressOpt only works at the level of Variables and Constants,
3420 // not other OperandX8632Mem, so there should be no mention of segment
3421 // registers there either.
3422 const OperandX8632Mem::SegmentRegisters SegmentReg =
3423 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003424 Variable *Base = llvm::dyn_cast<Variable>(Addr);
3425 computeAddressOpt(Base, Index, Shift, Offset);
3426 if (Base && Addr != Base) {
3427 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
3428 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003429 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003430 Inst->setDeleted();
3431 Context.insert(InstLoad::create(Func, Dest, Addr));
3432 }
3433}
3434
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003435void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
3436 Func->setError("Phi found in regular instruction list");
3437}
3438
3439void TargetX8632::lowerRet(const InstRet *Inst) {
3440 Variable *Reg = NULL;
3441 if (Inst->hasRetValue()) {
3442 Operand *Src0 = legalize(Inst->getRetValue());
3443 if (Src0->getType() == IceType_i64) {
3444 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);
3445 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);
3446 Reg = eax;
3447 Context.insert(InstFakeUse::create(Func, edx));
3448 } else if (Src0->getType() == IceType_f32 ||
3449 Src0->getType() == IceType_f64) {
3450 _fld(Src0);
Matt Wala928f1292014-07-07 16:50:46 -07003451 } else if (isVectorType(Src0->getType())) {
3452 Reg = legalizeToVar(Src0, false, Reg_xmm0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003453 } else {
3454 _mov(Reg, Src0, Reg_eax);
3455 }
3456 }
3457 _ret(Reg);
3458 // Add a fake use of esp to make sure esp stays alive for the entire
3459 // function. Otherwise post-call esp adjustments get dead-code
3460 // eliminated. TODO: Are there more places where the fake use
3461 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
3462 // have a ret instruction.
3463 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
3464 Context.insert(InstFakeUse::create(Func, esp));
3465}
3466
3467void TargetX8632::lowerSelect(const InstSelect *Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003468 Variable *Dest = Inst->getDest();
3469 Operand *SrcT = Inst->getTrueOperand();
3470 Operand *SrcF = Inst->getFalseOperand();
Matt Wala9cb61e22014-07-24 09:44:42 -07003471 Operand *Condition = Inst->getCondition();
3472
3473 if (isVectorType(Dest->getType())) {
3474 // a=d?b:c ==> d=sext(d); a=(b&d)|(c&~d)
3475 // TODO(wala): SSE4.1 has blendvps and pblendvb. SSE4.1 also has
3476 // blendps and pblendw for constant condition operands.
3477 Type SrcTy = SrcT->getType();
3478 Variable *T = makeReg(SrcTy);
3479 Variable *T2 = makeReg(SrcTy);
3480 // Sign extend the condition operand if applicable.
3481 if (SrcTy == IceType_v4f32) {
3482 // The sext operation takes only integer arguments.
3483 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());
3484 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3485 _movp(T, T3);
3486 } else if (typeElementType(SrcTy) != IceType_i1) {
3487 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3488 } else {
3489 _movp(T, Condition);
3490 }
3491 // ALIGNHACK: Until stack alignment support is implemented, the
3492 // bitwise vector instructions need to have both operands in
3493 // registers. Once there is support for stack alignment, LEGAL_HACK
3494 // can be removed.
3495#define LEGAL_HACK(Vect) legalizeToVar((Vect))
3496 _movp(T2, T);
3497 _pand(T, LEGAL_HACK(SrcT));
3498 _pandn(T2, LEGAL_HACK(SrcF));
3499 _por(T, T2);
3500 _movp(Dest, T);
3501#undef LEGAL_HACK
3502
3503 return;
3504 }
3505
3506 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
3507 Operand *ConditionRMI = legalize(Condition);
Matt Wala43ff7eb2014-06-18 10:30:07 -07003508 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003509 InstX8632Label *Label = InstX8632Label::create(Func, this);
3510
3511 if (Dest->getType() == IceType_i64) {
3512 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3513 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3514 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
3515 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
Matt Wala9cb61e22014-07-24 09:44:42 -07003516 _cmp(ConditionRMI, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003517 _mov(DestLo, SrcLoRI);
3518 _mov(DestHi, SrcHiRI);
3519 _br(InstX8632Br::Br_ne, Label);
3520 Context.insert(InstFakeUse::create(Func, DestLo));
3521 Context.insert(InstFakeUse::create(Func, DestHi));
3522 Operand *SrcFLo = loOperand(SrcF);
3523 Operand *SrcFHi = hiOperand(SrcF);
3524 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);
3525 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);
3526 _mov(DestLo, SrcLoRI);
3527 _mov(DestHi, SrcHiRI);
3528 } else {
Matt Wala9cb61e22014-07-24 09:44:42 -07003529 _cmp(ConditionRMI, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003530 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
3531 _mov(Dest, SrcT);
3532 _br(InstX8632Br::Br_ne, Label);
3533 Context.insert(InstFakeUse::create(Func, Dest));
3534 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);
3535 _mov(Dest, SrcF);
3536 }
3537
3538 Context.insert(Label);
3539}
3540
3541void TargetX8632::lowerStore(const InstStore *Inst) {
3542 Operand *Value = Inst->getData();
3543 Operand *Addr = Inst->getAddr();
Jan Voung5cd240d2014-06-25 10:36:46 -07003544 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003545
3546 if (NewAddr->getType() == IceType_i64) {
3547 Value = legalize(Value);
3548 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
3549 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
3550 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
3551 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
3552 } else {
3553 Value = legalize(Value, Legal_Reg | Legal_Imm, true);
3554 _store(Value, NewAddr);
3555 }
3556}
3557
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003558void TargetX8632::doAddressOptStore() {
3559 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur());
3560 Operand *Data = Inst->getData();
3561 Operand *Addr = Inst->getAddr();
3562 Variable *Index = NULL;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003563 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003564 int32_t Offset = 0; // TODO: make Constant
3565 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003566 // Vanilla ICE store instructions should not use the segment registers,
3567 // and computeAddressOpt only works at the level of Variables and Constants,
3568 // not other OperandX8632Mem, so there should be no mention of segment
3569 // registers there either.
3570 const OperandX8632Mem::SegmentRegisters SegmentReg =
3571 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003572 computeAddressOpt(Base, Index, Shift, Offset);
3573 if (Base && Addr != Base) {
3574 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
3575 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003576 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003577 Inst->setDeleted();
3578 Context.insert(InstStore::create(Func, Data, Addr));
3579 }
3580}
3581
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003582void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
3583 // This implements the most naive possible lowering.
3584 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
3585 Operand *Src0 = Inst->getComparison();
3586 SizeT NumCases = Inst->getNumCases();
3587 // OK, we'll be slightly less naive by forcing Src into a physical
3588 // register if there are 2 or more uses.
3589 if (NumCases >= 2)
3590 Src0 = legalizeToVar(Src0, true);
3591 else
3592 Src0 = legalize(Src0, Legal_All, true);
3593 for (SizeT I = 0; I < NumCases; ++I) {
3594 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
3595 _cmp(Src0, Value);
3596 _br(InstX8632Br::Br_e, Inst->getLabel(I));
3597 }
3598
3599 _br(Inst->getLabelDefault());
3600}
3601
Matt Walace0ca8f2014-07-24 12:34:20 -07003602// The following pattern occurs often in lowered C and C++ code:
3603//
3604// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
3605// %cmp.ext = sext <n x i1> %cmp to <n x ty>
3606//
3607// We can eliminate the sext operation by copying the result of pcmpeqd,
3608// pcmpgtd, or cmpps (which produce sign extended results) to the result
3609// of the sext operation.
3610void
3611TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
3612 if (InstCast *NextCast =
3613 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
3614 if (NextCast->getCastKind() == InstCast::Sext &&
3615 NextCast->getSrc(0) == SignExtendedResult) {
3616 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
3617 // Skip over the instruction.
3618 NextCast->setDeleted();
3619 Context.advanceNext();
3620 }
3621 }
3622}
3623
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003624void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
3625 const SizeT MaxSrcs = 0;
3626 Variable *Dest = NULL;
3627 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
3628 lowerCall(Call);
3629}
3630
Matt Wala9a0168a2014-07-23 14:56:10 -07003631// There is no support for loading or emitting vector constants, so the
3632// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
3633// etc. are initialized with register operations.
3634//
3635// TODO(wala): Add limited support for vector constants so that
3636// complex initialization in registers is unnecessary.
3637
Matt Wala83b80362014-07-16 10:21:30 -07003638Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07003639 Variable *Reg = makeReg(Ty, RegNum);
3640 // Insert a FakeDef, since otherwise the live range of Reg might
3641 // be overestimated.
3642 Context.insert(InstFakeDef::create(Func, Reg));
3643 _pxor(Reg, Reg);
3644 return Reg;
3645}
3646
Matt Wala9a0168a2014-07-23 14:56:10 -07003647Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
3648 Variable *MinusOnes = makeReg(Ty, RegNum);
3649 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
3650 Context.insert(InstFakeDef::create(Func, MinusOnes));
3651 _pcmpeq(MinusOnes, MinusOnes);
3652 return MinusOnes;
3653}
3654
Matt Wala83b80362014-07-16 10:21:30 -07003655Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07003656 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
Matt Wala9a0168a2014-07-23 14:56:10 -07003657 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
Matt Wala83b80362014-07-16 10:21:30 -07003658 _psub(Dest, MinusOne);
3659 return Dest;
3660}
3661
Matt Wala9a0168a2014-07-23 14:56:10 -07003662Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
3663 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
3664 Ty == IceType_v16i8);
3665 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
3666 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
3667 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
3668 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));
3669 return Reg;
3670 } else {
3671 // SSE has no left shift operation for vectors of 8 bit integers.
3672 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
3673 Constant *ConstantMask =
3674 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);
3675 Variable *Reg = makeReg(Ty, RegNum);
3676 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
3677 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
3678 return Reg;
3679 }
3680}
3681
Matt Wala49889232014-07-18 12:45:09 -07003682OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
3683 Variable *Slot,
3684 uint32_t Offset) {
3685 // Ensure that Loc is a stack slot.
3686 assert(Slot->getWeight() == RegWeight::Zero);
3687 assert(Slot->getRegNum() == Variable::NoRegister);
3688 // Compute the location of Loc in memory.
3689 // TODO(wala,stichnot): lea should not be required. The address of
3690 // the stack slot is known at compile time (although not until after
3691 // addProlog()).
3692 const Type PointerType = IceType_i32;
3693 Variable *Loc = makeReg(PointerType);
3694 _lea(Loc, Slot);
3695 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset);
3696 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
3697}
3698
Matt Wala928f1292014-07-07 16:50:46 -07003699// Helper for legalize() to emit the right code to lower an operand to a
3700// register of the appropriate type.
3701Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
3702 Type Ty = Src->getType();
3703 Variable *Reg = makeReg(Ty, RegNum);
Matt Walaad8f7262014-07-14 17:37:37 -07003704 if (isVectorType(Ty)) {
Matt Wala928f1292014-07-07 16:50:46 -07003705 _movp(Reg, Src);
3706 } else {
3707 _mov(Reg, Src);
3708 }
3709 return Reg;
3710}
3711
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003712Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
3713 bool AllowOverlap, int32_t RegNum) {
3714 // Assert that a physical register is allowed. To date, all calls
3715 // to legalize() allow a physical register. If a physical register
3716 // needs to be explicitly disallowed, then new code will need to be
3717 // written to force a spill.
3718 assert(Allowed & Legal_Reg);
3719 // If we're asking for a specific physical register, make sure we're
3720 // not allowing any other operand kinds. (This could be future
3721 // work, e.g. allow the shl shift amount to be either an immediate
3722 // or in ecx.)
3723 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
3724 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
3725 // Before doing anything with a Mem operand, we need to ensure
3726 // that the Base and Index components are in physical registers.
3727 Variable *Base = Mem->getBase();
3728 Variable *Index = Mem->getIndex();
3729 Variable *RegBase = NULL;
3730 Variable *RegIndex = NULL;
3731 if (Base) {
3732 RegBase = legalizeToVar(Base, true);
3733 }
3734 if (Index) {
3735 RegIndex = legalizeToVar(Index, true);
3736 }
3737 if (Base != RegBase || Index != RegIndex) {
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003738 From = OperandX8632Mem::create(
3739 Func, Mem->getType(), RegBase, Mem->getOffset(), RegIndex,
3740 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003741 }
3742
3743 if (!(Allowed & Legal_Mem)) {
Matt Wala928f1292014-07-07 16:50:46 -07003744 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003745 }
3746 return From;
3747 }
3748 if (llvm::isa<Constant>(From)) {
Matt Walad8f4a7d2014-06-18 09:55:03 -07003749 if (llvm::isa<ConstantUndef>(From)) {
3750 // Lower undefs to zero. Another option is to lower undefs to an
3751 // uninitialized register; however, using an uninitialized register
3752 // results in less predictable code.
3753 //
3754 // If in the future the implementation is changed to lower undef
3755 // values to uninitialized registers, a FakeDef will be needed:
3756 // Context.insert(InstFakeDef::create(Func, Reg));
3757 // This is in order to ensure that the live range of Reg is not
3758 // overestimated. If the constant being lowered is a 64 bit value,
3759 // then the result should be split and the lo and hi components will
3760 // need to go in uninitialized registers.
Matt Wala83b80362014-07-16 10:21:30 -07003761 if (isVectorType(From->getType()))
3762 return makeVectorOfZeros(From->getType());
3763 From = Ctx->getConstantZero(From->getType());
Matt Walad8f4a7d2014-06-18 09:55:03 -07003764 }
Matt Walaad8f7262014-07-14 17:37:37 -07003765 // There should be no constants of vector type (other than undef).
3766 assert(!isVectorType(From->getType()));
Jim Stichnothde4ca712014-06-29 08:13:48 -07003767 bool NeedsReg = false;
3768 if (!(Allowed & Legal_Imm))
3769 // Immediate specifically not allowed
3770 NeedsReg = true;
3771 // TODO(stichnot): LEAHACK: remove Legal_Reloc once a proper
3772 // emitter is used.
3773 if (!(Allowed & Legal_Reloc) && llvm::isa<ConstantRelocatable>(From))
3774 // Relocatable specifically not allowed
3775 NeedsReg = true;
3776 if (!(Allowed & Legal_Mem) &&
3777 (From->getType() == IceType_f32 || From->getType() == IceType_f64))
3778 // On x86, FP constants are lowered to mem operands.
3779 NeedsReg = true;
Jan Voung5a13f452014-06-18 10:42:02 -07003780 if (NeedsReg) {
Matt Wala928f1292014-07-07 16:50:46 -07003781 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003782 }
3783 return From;
3784 }
3785 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
Matt Walaad8f7262014-07-14 17:37:37 -07003786 // Check if the variable is guaranteed a physical register. This
3787 // can happen either when the variable is pre-colored or when it is
3788 // assigned infinite weight.
3789 bool MustHaveRegister =
3790 (Var->hasReg() || Var->getWeight() == RegWeight::Inf);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003791 // We need a new physical register for the operand if:
Jim Stichnothca662e92014-07-10 15:32:36 -07003792 // Mem is not allowed and Var isn't guaranteed a physical
3793 // register, or
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003794 // RegNum is required and Var->getRegNum() doesn't match.
Matt Walaad8f7262014-07-14 17:37:37 -07003795 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003796 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
Matt Wala928f1292014-07-07 16:50:46 -07003797 Variable *Reg = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003798 if (RegNum == Variable::NoRegister) {
3799 Reg->setPreferredRegister(Var, AllowOverlap);
3800 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003801 From = Reg;
3802 }
3803 return From;
3804 }
3805 llvm_unreachable("Unhandled operand kind in legalize()");
3806 return From;
3807}
3808
3809// Provide a trivial wrapper to legalize() for this common usage.
3810Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,
3811 int32_t RegNum) {
3812 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
3813}
3814
Jan Voung5cd240d2014-06-25 10:36:46 -07003815OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
3816 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
3817 // It may be the case that address mode optimization already creates
3818 // an OperandX8632Mem, so in that case it wouldn't need another level
3819 // of transformation.
3820 if (!Mem) {
3821 Variable *Base = llvm::dyn_cast<Variable>(Operand);
3822 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
3823 assert(Base || Offset);
3824 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
3825 }
3826 return llvm::cast<OperandX8632Mem>(legalize(Mem));
3827}
3828
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003829Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
Jan Voung1ee34162014-06-24 13:43:30 -07003830 // There aren't any 64-bit integer registers for x86-32.
3831 assert(Type != IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003832 Variable *Reg = Func->makeVariable(Type, Context.getNode());
3833 if (RegNum == Variable::NoRegister)
3834 Reg->setWeightInfinite();
3835 else
3836 Reg->setRegNum(RegNum);
3837 return Reg;
3838}
3839
3840void TargetX8632::postLower() {
3841 if (Ctx->getOptLevel() != Opt_m1)
3842 return;
3843 // TODO: Avoid recomputing WhiteList every instruction.
Jan Voung95598992014-07-09 09:54:25 -07003844 RegSetMask RegInclude = RegSet_All;
3845 RegSetMask RegExclude = RegSet_None;
3846 if (hasFramePointer())
3847 RegExclude |= RegSet_FramePointer;
3848 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003849 // Make one pass to black-list pre-colored registers. TODO: If
3850 // there was some prior register allocation pass that made register
3851 // assignments, those registers need to be black-listed here as
3852 // well.
3853 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
3854 ++I) {
3855 const Inst *Inst = *I;
3856 if (Inst->isDeleted())
3857 continue;
3858 if (llvm::isa<InstFakeKill>(Inst))
3859 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003860 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
3861 Operand *Src = Inst->getSrc(SrcNum);
3862 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003863 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003864 const Variable *Var = Src->getVar(J);
3865 if (!Var->hasReg())
3866 continue;
3867 WhiteList[Var->getRegNum()] = false;
3868 }
3869 }
3870 }
3871 // The second pass colors infinite-weight variables.
3872 llvm::SmallBitVector AvailableRegisters = WhiteList;
3873 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
3874 ++I) {
3875 const Inst *Inst = *I;
3876 if (Inst->isDeleted())
3877 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003878 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
3879 Operand *Src = Inst->getSrc(SrcNum);
3880 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003881 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003882 Variable *Var = Src->getVar(J);
3883 if (Var->hasReg())
3884 continue;
3885 if (!Var->getWeight().isInf())
3886 continue;
3887 llvm::SmallBitVector AvailableTypedRegisters =
3888 AvailableRegisters & getRegisterSetForType(Var->getType());
3889 if (!AvailableTypedRegisters.any()) {
3890 // This is a hack in case we run out of physical registers
3891 // due to an excessive number of "push" instructions from
3892 // lowering a call.
3893 AvailableRegisters = WhiteList;
3894 AvailableTypedRegisters =
3895 AvailableRegisters & getRegisterSetForType(Var->getType());
3896 }
3897 assert(AvailableTypedRegisters.any());
3898 int32_t RegNum = AvailableTypedRegisters.find_first();
3899 Var->setRegNum(RegNum);
3900 AvailableRegisters[RegNum] = false;
3901 }
3902 }
3903 }
3904}
3905
Matt Wala928f1292014-07-07 16:50:46 -07003906template <> void ConstantInteger::emit(GlobalContext *Ctx) const {
3907 Ostream &Str = Ctx->getStrEmit();
3908 Str << getValue();
3909}
3910
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003911template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
3912 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07003913 // It would be better to prefix with ".L$" instead of "L$", but
3914 // llvm-mc doesn't parse "dword ptr [.L$foo]".
3915 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
3916}
3917
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003918template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
3919 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07003920 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
3921}
3922
Jim Stichnothde4ca712014-06-29 08:13:48 -07003923TargetGlobalInitX8632::TargetGlobalInitX8632(GlobalContext *Ctx)
3924 : TargetGlobalInitLowering(Ctx) {}
3925
3926namespace {
3927char hexdigit(unsigned X) { return X < 10 ? '0' + X : 'A' + X - 10; }
3928}
3929
3930void TargetGlobalInitX8632::lower(const IceString &Name, SizeT Align,
3931 bool IsInternal, bool IsConst,
3932 bool IsZeroInitializer, SizeT Size,
3933 const char *Data, bool DisableTranslation) {
3934 if (Ctx->isVerbose()) {
3935 // TODO: Consider moving the dump output into the driver to be
3936 // reused for all targets.
3937 Ostream &Str = Ctx->getStrDump();
3938 Str << "@" << Name << " = " << (IsInternal ? "internal" : "external");
3939 Str << (IsConst ? " constant" : " global");
3940 Str << " [" << Size << " x i8] ";
3941 if (IsZeroInitializer) {
3942 Str << "zeroinitializer";
3943 } else {
3944 Str << "c\"";
3945 // Code taken from PrintEscapedString() in AsmWriter.cpp. Keep
3946 // the strings in the same format as the .ll file for practical
3947 // diffing.
3948 for (uint64_t i = 0; i < Size; ++i) {
3949 unsigned char C = Data[i];
3950 if (isprint(C) && C != '\\' && C != '"')
3951 Str << C;
3952 else
3953 Str << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
3954 }
3955 Str << "\"";
3956 }
3957 Str << ", align " << Align << "\n";
3958 }
3959
3960 if (DisableTranslation)
3961 return;
3962
3963 Ostream &Str = Ctx->getStrEmit();
3964 // constant:
3965 // .section .rodata,"a",@progbits
3966 // .align ALIGN
3967 // .byte ...
3968 // .size NAME, SIZE
3969
3970 // non-constant:
3971 // .data
3972 // .align ALIGN
3973 // .byte ...
3974 // .size NAME, SIZE
3975
3976 // zeroinitializer (constant):
3977 // (.section or .data as above)
3978 // .align ALIGN
3979 // .zero SIZE
3980 // .size NAME, SIZE
3981
3982 // zeroinitializer (non-constant):
3983 // (.section or .data as above)
3984 // .comm NAME, SIZE, ALIGN
3985 // .local NAME
3986
3987 IceString MangledName = Ctx->mangleName(Name);
3988 // Start a new section.
3989 if (IsConst) {
3990 Str << "\t.section\t.rodata,\"a\",@progbits\n";
3991 } else {
3992 Str << "\t.type\t" << MangledName << ",@object\n";
3993 Str << "\t.data\n";
3994 }
3995 if (IsZeroInitializer) {
3996 if (IsConst) {
3997 Str << "\t.align\t" << Align << "\n";
3998 Str << MangledName << ":\n";
3999 Str << "\t.zero\t" << Size << "\n";
4000 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4001 } else {
4002 // TODO(stichnot): Put the appropriate non-constant
4003 // zeroinitializers in a .bss section to reduce object size.
4004 Str << "\t.comm\t" << MangledName << ", " << Size << ", " << Align
4005 << "\n";
4006 }
4007 } else {
4008 Str << "\t.align\t" << Align << "\n";
4009 Str << MangledName << ":\n";
4010 for (SizeT i = 0; i < Size; ++i) {
4011 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4012 }
4013 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4014 }
4015 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4016 << "\n";
4017}
4018
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004019} // end of namespace Ice