blob: 8e56a10bbb83adba9e884821290dda0f289b6c54 [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
12// high-level instruction. It also implements
13// TargetX8632Fast::postLower() which does the simplest possible
14// register allocation for the "fast" target.
15//
16//===----------------------------------------------------------------------===//
17
18#include "IceDefs.h"
19#include "IceCfg.h"
20#include "IceCfgNode.h"
21#include "IceInstX8632.h"
22#include "IceOperand.h"
23#include "IceTargetLoweringX8632.def"
24#include "IceTargetLoweringX8632.h"
Matt Wala0a450512014-07-30 12:44:39 -070025#include "llvm/Support/CommandLine.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070026
27namespace Ice {
28
29namespace {
30
Matt Walace0ca8f2014-07-24 12:34:20 -070031// The following table summarizes the logic for lowering the fcmp
32// instruction. There is one table entry for each of the 16 conditions.
33//
34// The first four columns describe the case when the operands are
35// floating point scalar values. A comment in lowerFcmp() describes the
36// lowering template. In the most general case, there is a compare
37// followed by two conditional branches, because some fcmp conditions
38// don't map to a single x86 conditional branch. However, in many cases
39// it is possible to swap the operands in the comparison and have a
40// single conditional branch. Since it's quite tedious to validate the
41// table by hand, good execution tests are helpful.
42//
43// The last two columns describe the case when the operands are vectors
44// of floating point values. For most fcmp conditions, there is a clear
45// mapping to a single x86 cmpps instruction variant. Some fcmp
46// conditions require special code to handle and these are marked in the
47// table with a Cmpps_Invalid predicate.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070048const struct TableFcmp_ {
49 uint32_t Default;
Matt Walace0ca8f2014-07-24 12:34:20 -070050 bool SwapScalarOperands;
Jan Vounge4da26f2014-07-15 17:52:39 -070051 InstX8632::BrCond C1, C2;
Matt Walace0ca8f2014-07-24 12:34:20 -070052 bool SwapVectorOperands;
53 InstX8632Cmpps::CmppsCond Predicate;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070054} TableFcmp[] = {
Matt Walace0ca8f2014-07-24 12:34:20 -070055#define X(val, dflt, swapS, C1, C2, swapV, pred) \
56 { \
57 dflt, swapS, InstX8632Br::C1, InstX8632Br::C2, swapV, InstX8632Cmpps::pred \
58 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070059 ,
Matt Walace0ca8f2014-07-24 12:34:20 -070060 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070061#undef X
Matt Walace0ca8f2014-07-24 12:34:20 -070062};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070063const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
64
65// The following table summarizes the logic for lowering the icmp instruction
66// for i32 and narrower types. Each icmp condition has a clear mapping to an
67// x86 conditional branch instruction.
68
69const struct TableIcmp32_ {
Jan Vounge4da26f2014-07-15 17:52:39 -070070 InstX8632::BrCond Mapping;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070071} TableIcmp32[] = {
72#define X(val, C_32, C1_64, C2_64, C3_64) \
73 { InstX8632Br::C_32 } \
74 ,
75 ICMPX8632_TABLE
76#undef X
77 };
78const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
79
80// The following table summarizes the logic for lowering the icmp instruction
81// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
82// conditional branches are needed. For the other conditions, three separate
83// conditional branches are needed.
84const struct TableIcmp64_ {
Jan Vounge4da26f2014-07-15 17:52:39 -070085 InstX8632::BrCond C1, C2, C3;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070086} TableIcmp64[] = {
87#define X(val, C_32, C1_64, C2_64, C3_64) \
88 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
89 ,
90 ICMPX8632_TABLE
91#undef X
92 };
93const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
94
Jan Vounge4da26f2014-07-15 17:52:39 -070095InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070096 size_t Index = static_cast<size_t>(Cond);
97 assert(Index < TableIcmp32Size);
98 return TableIcmp32[Index].Mapping;
99}
100
Matt Wala49889232014-07-18 12:45:09 -0700101const struct TableTypeX8632Attributes_ {
102 Type InVectorElementType;
103} TableTypeX8632Attributes[] = {
104#define X(tag, elementty, cvt, sdss, pack, width) \
105 { elementty } \
106 ,
107 ICETYPEX8632_TABLE
108#undef X
109 };
110const size_t TableTypeX8632AttributesSize =
111 llvm::array_lengthof(TableTypeX8632Attributes);
112
113// Return the type which the elements of the vector have in the X86
114// representation of the vector.
115Type getInVectorElementType(Type Ty) {
116 assert(isVectorType(Ty));
117 size_t Index = static_cast<size_t>(Ty);
Jim Stichnoth6e992142014-07-30 14:45:20 -0700118 (void)Index;
Matt Wala49889232014-07-18 12:45:09 -0700119 assert(Index < TableTypeX8632AttributesSize);
120 return TableTypeX8632Attributes[Ty].InVectorElementType;
121}
122
Matt Wala45a06232014-07-09 16:33:22 -0700123// The maximum number of arguments to pass in XMM registers
124const unsigned X86_MAX_XMM_ARGS = 4;
Matt Wala83b80362014-07-16 10:21:30 -0700125// The number of bits in a byte
126const unsigned X86_CHAR_BIT = 8;
Matt Wala45a06232014-07-09 16:33:22 -0700127
Matt Wala0a450512014-07-30 12:44:39 -0700128// Instruction set options
129namespace cl = ::llvm::cl;
130cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(
131 "mattr", cl::desc("X86 target attributes"),
132 cl::init(TargetX8632::SSE2),
133 cl::values(
134 clEnumValN(TargetX8632::SSE2, "sse2",
135 "Enable SSE2 instructions (default)"),
136 clEnumValN(TargetX8632::SSE4_1, "sse4.1",
137 "Enable SSE 4.1 instructions"), clEnumValEnd));
138
Matt Wala7fa22d82014-07-17 12:41:31 -0700139// Return a string representation of the type that is suitable for use
140// in an identifier.
141IceString typeIdentString(const Type Ty) {
142 IceString Str;
143 llvm::raw_string_ostream BaseOS(Str);
Matt Wala7fa22d82014-07-17 12:41:31 -0700144 if (isVectorType(Ty)) {
Jim Stichnoth78282f62014-07-27 23:14:00 -0700145 BaseOS << "v" << typeNumElements(Ty) << typeElementType(Ty);
Matt Wala7fa22d82014-07-17 12:41:31 -0700146 } else {
Jim Stichnoth78282f62014-07-27 23:14:00 -0700147 BaseOS << Ty;
Matt Wala7fa22d82014-07-17 12:41:31 -0700148 }
149 return BaseOS.str();
150}
151
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700152// In some cases, there are x-macros tables for both high-level and
153// low-level instructions/operands that use the same enum key value.
154// The tables are kept separate to maintain a proper separation
155// between abstraction layers. There is a risk that the tables
156// could get out of sync if enum values are reordered or if entries
157// are added or deleted. This dummy function uses static_assert to
158// ensure everything is kept in sync.
Jan Voung839c4ce2014-07-28 15:19:43 -0700159void __attribute__((unused)) xMacroIntegrityCheck() {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700160 // Validate the enum values in FCMPX8632_TABLE.
161 {
162 // Define a temporary set of enum values based on low-level
163 // table entries.
164 enum _tmp_enum {
Matt Walace0ca8f2014-07-24 12:34:20 -0700165#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700166 FCMPX8632_TABLE
167#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700168 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700169 };
170// Define a set of constants based on high-level table entries.
171#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
172 ICEINSTFCMP_TABLE;
173#undef X
174// Define a set of constants based on low-level table entries,
175// and ensure the table entry keys are consistent.
Matt Walace0ca8f2014-07-24 12:34:20 -0700176#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700177 static const int _table2_##val = _tmp_##val; \
178 STATIC_ASSERT(_table1_##val == _table2_##val);
179 FCMPX8632_TABLE;
180#undef X
181// Repeat the static asserts with respect to the high-level
182// table entries in case the high-level table has extra entries.
183#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
184 ICEINSTFCMP_TABLE;
185#undef X
186 }
187
188 // Validate the enum values in ICMPX8632_TABLE.
189 {
190 // Define a temporary set of enum values based on low-level
191 // table entries.
192 enum _tmp_enum {
193#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
194 ICMPX8632_TABLE
195#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700196 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700197 };
198// Define a set of constants based on high-level table entries.
199#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
200 ICEINSTICMP_TABLE;
201#undef X
202// Define a set of constants based on low-level table entries,
203// and ensure the table entry keys are consistent.
204#define X(val, C_32, C1_64, C2_64, C3_64) \
205 static const int _table2_##val = _tmp_##val; \
206 STATIC_ASSERT(_table1_##val == _table2_##val);
207 ICMPX8632_TABLE;
208#undef X
209// Repeat the static asserts with respect to the high-level
210// table entries in case the high-level table has extra entries.
211#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
212 ICEINSTICMP_TABLE;
213#undef X
214 }
215
216 // Validate the enum values in ICETYPEX8632_TABLE.
217 {
218 // Define a temporary set of enum values based on low-level
219 // table entries.
220 enum _tmp_enum {
Matt Wala49889232014-07-18 12:45:09 -0700221#define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700222 ICETYPEX8632_TABLE
223#undef X
Jim Stichnoth4376d292014-05-23 13:39:02 -0700224 _num
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700225 };
226// Define a set of constants based on high-level table entries.
Matt Wala928f1292014-07-07 16:50:46 -0700227#define X(tag, size, align, elts, elty, str) \
228 static const int _table1_##tag = tag;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700229 ICETYPE_TABLE;
230#undef X
231// Define a set of constants based on low-level table entries,
232// and ensure the table entry keys are consistent.
Matt Wala49889232014-07-18 12:45:09 -0700233#define X(tag, elementty, cvt, sdss, pack, width) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700234 static const int _table2_##tag = _tmp_##tag; \
235 STATIC_ASSERT(_table1_##tag == _table2_##tag);
236 ICETYPEX8632_TABLE;
237#undef X
238// Repeat the static asserts with respect to the high-level
239// table entries in case the high-level table has extra entries.
Matt Wala928f1292014-07-07 16:50:46 -0700240#define X(tag, size, align, elts, elty, str) \
241 STATIC_ASSERT(_table1_##tag == _table2_##tag);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700242 ICETYPE_TABLE;
243#undef X
244 }
245}
246
247} // end of anonymous namespace
248
249TargetX8632::TargetX8632(Cfg *Func)
Matt Wala0a450512014-07-30 12:44:39 -0700250 : TargetLowering(Func), InstructionSet(CLInstructionSet),
251 IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0),
252 NextLabelNumber(0), ComputedLiveRanges(false),
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700253 PhysicalRegisters(VarList(Reg_NUM)) {
254 // TODO: Don't initialize IntegerRegisters and friends every time.
255 // Instead, initialize in some sort of static initializer for the
256 // class.
257 llvm::SmallBitVector IntegerRegisters(Reg_NUM);
258 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
259 llvm::SmallBitVector FloatRegisters(Reg_NUM);
Matt Wala928f1292014-07-07 16:50:46 -0700260 llvm::SmallBitVector VectorRegisters(Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700261 llvm::SmallBitVector InvalidRegisters(Reg_NUM);
262 ScratchRegs.resize(Reg_NUM);
263#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
264 frameptr, isI8, isInt, isFP) \
265 IntegerRegisters[val] = isInt; \
266 IntegerRegistersI8[val] = isI8; \
267 FloatRegisters[val] = isFP; \
Matt Wala928f1292014-07-07 16:50:46 -0700268 VectorRegisters[val] = isFP; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700269 ScratchRegs[val] = scratch;
270 REGX8632_TABLE;
271#undef X
272 TypeToRegisterSet[IceType_void] = InvalidRegisters;
273 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
274 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
275 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
276 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
277 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
278 TypeToRegisterSet[IceType_f32] = FloatRegisters;
279 TypeToRegisterSet[IceType_f64] = FloatRegisters;
Matt Wala928f1292014-07-07 16:50:46 -0700280 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
281 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
282 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
283 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
284 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
285 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
286 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700287}
288
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700289void TargetX8632::translateO2() {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700290 GlobalContext *Context = Func->getContext();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700291
292 // Lower Phi instructions.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700293 Timer T_placePhiLoads;
294 Func->placePhiLoads();
295 if (Func->hasError())
296 return;
297 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
298 Timer T_placePhiStores;
299 Func->placePhiStores();
300 if (Func->hasError())
301 return;
302 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
303 Timer T_deletePhis;
304 Func->deletePhis();
305 if (Func->hasError())
306 return;
307 T_deletePhis.printElapsedUs(Context, "deletePhis()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700308 Func->dump("After Phi lowering");
309
310 // Address mode optimization.
311 Timer T_doAddressOpt;
312 Func->doAddressOpt();
313 T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()");
314
Matt Wala45a06232014-07-09 16:33:22 -0700315 // Argument lowering
316 Timer T_argLowering;
317 Func->doArgLowering();
318 T_argLowering.printElapsedUs(Context, "lowerArguments()");
319
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700320 // Target lowering. This requires liveness analysis for some parts
321 // of the lowering decisions, such as compare/branch fusing. If
322 // non-lightweight liveness analysis is used, the instructions need
323 // to be renumbered first. TODO: This renumbering should only be
324 // necessary if we're actually calculating live intervals, which we
325 // only do for register allocation.
326 Timer T_renumber1;
327 Func->renumberInstructions();
328 if (Func->hasError())
329 return;
330 T_renumber1.printElapsedUs(Context, "renumberInstructions()");
Matt Wala45a06232014-07-09 16:33:22 -0700331
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700332 // TODO: It should be sufficient to use the fastest liveness
333 // calculation, i.e. livenessLightweight(). However, for some
334 // reason that slows down the rest of the translation. Investigate.
335 Timer T_liveness1;
336 Func->liveness(Liveness_Basic);
337 if (Func->hasError())
338 return;
339 T_liveness1.printElapsedUs(Context, "liveness()");
340 Func->dump("After x86 address mode opt");
Matt Wala45a06232014-07-09 16:33:22 -0700341
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700342 Timer T_genCode;
343 Func->genCode();
344 if (Func->hasError())
345 return;
346 T_genCode.printElapsedUs(Context, "genCode()");
347
348 // Register allocation. This requires instruction renumbering and
349 // full liveness analysis.
350 Timer T_renumber2;
351 Func->renumberInstructions();
352 if (Func->hasError())
353 return;
354 T_renumber2.printElapsedUs(Context, "renumberInstructions()");
355 Timer T_liveness2;
356 Func->liveness(Liveness_Intervals);
357 if (Func->hasError())
358 return;
359 T_liveness2.printElapsedUs(Context, "liveness()");
360 // Validate the live range computations. Do it outside the timing
361 // code. TODO: Put this under a flag.
362 bool ValidLiveness = Func->validateLiveness();
363 assert(ValidLiveness);
364 (void)ValidLiveness; // used only in assert()
365 ComputedLiveRanges = true;
366 // The post-codegen dump is done here, after liveness analysis and
367 // associated cleanup, to make the dump cleaner and more useful.
368 Func->dump("After initial x8632 codegen");
369 Timer T_regAlloc;
370 regAlloc();
371 if (Func->hasError())
372 return;
373 T_regAlloc.printElapsedUs(Context, "regAlloc()");
374 Func->dump("After linear scan regalloc");
375
376 // Stack frame mapping.
377 Timer T_genFrame;
378 Func->genFrame();
379 if (Func->hasError())
380 return;
381 T_genFrame.printElapsedUs(Context, "genFrame()");
382 Func->dump("After stack frame mapping");
383}
384
385void TargetX8632::translateOm1() {
386 GlobalContext *Context = Func->getContext();
387 Timer T_placePhiLoads;
388 Func->placePhiLoads();
389 if (Func->hasError())
390 return;
391 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
392 Timer T_placePhiStores;
393 Func->placePhiStores();
394 if (Func->hasError())
395 return;
396 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
397 Timer T_deletePhis;
398 Func->deletePhis();
399 if (Func->hasError())
400 return;
401 T_deletePhis.printElapsedUs(Context, "deletePhis()");
402 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700403
Matt Wala45a06232014-07-09 16:33:22 -0700404 Timer T_argLowering;
405 Func->doArgLowering();
406 T_argLowering.printElapsedUs(Context, "lowerArguments()");
407
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700408 Timer T_genCode;
409 Func->genCode();
410 if (Func->hasError())
411 return;
412 T_genCode.printElapsedUs(Context, "genCode()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700413 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700414
415 Timer T_genFrame;
416 Func->genFrame();
417 if (Func->hasError())
418 return;
419 T_genFrame.printElapsedUs(Context, "genFrame()");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700420 Func->dump("After stack frame mapping");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700421}
422
423IceString TargetX8632::RegNames[] = {
424#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
425 frameptr, isI8, isInt, isFP) \
426 name,
427 REGX8632_TABLE
428#undef X
429};
430
431Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
432 assert(RegNum < PhysicalRegisters.size());
433 Variable *Reg = PhysicalRegisters[RegNum];
434 if (Reg == NULL) {
435 CfgNode *Node = NULL; // NULL means multi-block lifetime
436 Reg = Func->makeVariable(IceType_i32, Node);
437 Reg->setRegNum(RegNum);
438 PhysicalRegisters[RegNum] = Reg;
439 }
440 return Reg;
441}
442
443IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
444 assert(RegNum < Reg_NUM);
445 static IceString RegNames8[] = {
446#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
447 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700448 name8,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700449 REGX8632_TABLE
450#undef X
451 };
452 static IceString RegNames16[] = {
453#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
454 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700455 name16,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700456 REGX8632_TABLE
457#undef X
458 };
459 switch (Ty) {
460 case IceType_i1:
461 case IceType_i8:
462 return RegNames8[RegNum];
463 case IceType_i16:
464 return RegNames16[RegNum];
465 default:
466 return RegNames[RegNum];
467 }
468}
469
470void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const {
471 Ostream &Str = Ctx->getStrEmit();
472 assert(Var->getLocalUseNode() == NULL ||
473 Var->getLocalUseNode() == Func->getCurrentNode());
474 if (Var->hasReg()) {
475 Str << getRegName(Var->getRegNum(), Var->getType());
476 return;
477 }
478 Str << InstX8632::getWidthString(Var->getType());
479 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700480 int32_t Offset = Var->getStackOffset();
481 if (!hasFramePointer())
482 Offset += getStackAdjustment();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700483 if (Offset) {
484 if (Offset > 0)
485 Str << "+";
486 Str << Offset;
487 }
488 Str << "]";
489}
490
Matt Wala45a06232014-07-09 16:33:22 -0700491void TargetX8632::lowerArguments() {
492 VarList &Args = Func->getArgs();
493 // The first four arguments of vector type, regardless of their
494 // position relative to the other arguments in the argument list, are
495 // passed in registers xmm0 - xmm3.
496 unsigned NumXmmArgs = 0;
497
498 Context.init(Func->getEntryNode());
499 Context.setInsertPoint(Context.getCur());
500
501 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
502 ++I) {
503 Variable *Arg = Args[I];
504 Type Ty = Arg->getType();
505 if (!isVectorType(Ty))
506 continue;
507 // Replace Arg in the argument list with the home register. Then
508 // generate an instruction in the prolog to copy the home register
509 // to the assigned location of Arg.
510 int32_t RegNum = Reg_xmm0 + NumXmmArgs;
511 ++NumXmmArgs;
512 IceString Name = "home_reg:" + Arg->getName();
513 const CfgNode *DefNode = NULL;
514 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);
515 RegisterArg->setRegNum(RegNum);
516 RegisterArg->setIsArg(Func);
517 Arg->setIsArg(Func, false);
518
519 Args[I] = RegisterArg;
520 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
521 }
522}
523
524// Helper function for addProlog().
525//
526// This assumes Arg is an argument passed on the stack. This sets the
527// frame offset for Arg and updates InArgsSizeBytes according to Arg's
528// width. For an I64 arg that has been split into Lo and Hi components,
529// it calls itself recursively on the components, taking care to handle
530// Lo first because of the little-endian architecture. Lastly, this
531// function generates an instruction to copy Arg into its assigned
532// register if applicable.
533void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
534 size_t BasicFrameOffset,
535 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700536 Variable *Lo = Arg->getLo();
537 Variable *Hi = Arg->getHi();
538 Type Ty = Arg->getType();
539 if (Lo && Hi && Ty == IceType_i64) {
540 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
541 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
Matt Wala45a06232014-07-09 16:33:22 -0700542 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
543 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700544 return;
545 }
546 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
Matt Wala45a06232014-07-09 16:33:22 -0700547 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700548 if (Arg->hasReg()) {
549 assert(Ty != IceType_i64);
550 OperandX8632Mem *Mem = OperandX8632Mem::create(
551 Func, Ty, FramePtr,
552 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
Matt Wala45a06232014-07-09 16:33:22 -0700553 if (isVectorType(Arg->getType())) {
554 _movp(Arg, Mem);
555 } else {
556 _mov(Arg, Mem);
557 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700558 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700559}
560
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700561Type TargetX8632::stackSlotType() { return IceType_i32; }
562
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700563void TargetX8632::addProlog(CfgNode *Node) {
564 // If SimpleCoalescing is false, each variable without a register
565 // gets its own unique stack slot, which leads to large stack
566 // frames. If SimpleCoalescing is true, then each "global" variable
567 // without a register gets its own slot, but "local" variable slots
568 // are reused across basic blocks. E.g., if A and B are local to
569 // block 1 and C is local to block 2, then C may share a slot with A
570 // or B.
571 const bool SimpleCoalescing = true;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700572 size_t InArgsSizeBytes = 0;
573 size_t RetIpSizeBytes = 4;
574 size_t PreservedRegsSizeBytes = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700575 LocalsSizeBytes = 0;
576 Context.init(Node);
577 Context.setInsertPoint(Context.getCur());
578
579 // Determine stack frame offsets for each Variable without a
580 // register assignment. This can be done as one variable per stack
581 // slot. Or, do coalescing by running the register allocator again
582 // with an infinite set of registers (as a side effect, this gives
583 // variables a second chance at physical register assignment).
584 //
585 // A middle ground approach is to leverage sparsity and allocate one
586 // block of space on the frame for globals (variables with
587 // multi-block lifetime), and one block to share for locals
588 // (single-block lifetime).
589
590 llvm::SmallBitVector CalleeSaves =
591 getRegisterSet(RegSet_CalleeSave, RegSet_None);
592
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700593 size_t GlobalsSize = 0;
594 std::vector<size_t> LocalsSize(Func->getNumNodes());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700595
596 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
597 // LocalsSizeBytes.
598 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
599 const VarList &Variables = Func->getVariables();
600 const VarList &Args = Func->getArgs();
601 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
602 I != E; ++I) {
603 Variable *Var = *I;
604 if (Var->hasReg()) {
605 RegsUsed[Var->getRegNum()] = true;
606 continue;
607 }
Matt Wala45a06232014-07-09 16:33:22 -0700608 // An argument either does not need a stack slot (if passed in a
609 // register) or already has one (if passed on the stack).
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700610 if (Var->getIsArg())
611 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700612 // An unreferenced variable doesn't need a stack slot.
613 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
614 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700615 // A spill slot linked to a variable with a stack slot should reuse
616 // that stack slot.
617 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
618 if (Variable *Linked = Var->getPreferredRegister()) {
619 if (!Linked->hasReg())
620 continue;
621 }
622 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700623 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700624 if (SimpleCoalescing) {
625 if (Var->isMultiblockLife()) {
626 GlobalsSize += Increment;
627 } else {
628 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
629 LocalsSize[NodeIndex] += Increment;
630 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
631 LocalsSizeBytes = LocalsSize[NodeIndex];
632 }
633 } else {
634 LocalsSizeBytes += Increment;
635 }
636 }
637 LocalsSizeBytes += GlobalsSize;
638
639 // Add push instructions for preserved registers.
640 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
641 if (CalleeSaves[i] && RegsUsed[i]) {
642 PreservedRegsSizeBytes += 4;
643 const bool SuppressStackAdjustment = true;
644 _push(getPhysicalRegister(i), SuppressStackAdjustment);
645 }
646 }
647
648 // Generate "push ebp; mov ebp, esp"
649 if (IsEbpBasedFrame) {
650 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
651 .count() == 0);
652 PreservedRegsSizeBytes += 4;
653 Variable *ebp = getPhysicalRegister(Reg_ebp);
654 Variable *esp = getPhysicalRegister(Reg_esp);
655 const bool SuppressStackAdjustment = true;
656 _push(ebp, SuppressStackAdjustment);
657 _mov(ebp, esp);
658 }
659
660 // Generate "sub esp, LocalsSizeBytes"
661 if (LocalsSizeBytes)
662 _sub(getPhysicalRegister(Reg_esp),
663 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
664
665 resetStackAdjustment();
666
Matt Wala45a06232014-07-09 16:33:22 -0700667 // Fill in stack offsets for stack args, and copy args into registers
668 // for those that were register-allocated. Args are pushed right to
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700669 // left, so Arg[0] is closest to the stack/frame pointer.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700670 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700671 size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700672 if (!IsEbpBasedFrame)
673 BasicFrameOffset += LocalsSizeBytes;
Matt Wala45a06232014-07-09 16:33:22 -0700674
675 unsigned NumXmmArgs = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700676 for (SizeT i = 0; i < Args.size(); ++i) {
677 Variable *Arg = Args[i];
Matt Wala45a06232014-07-09 16:33:22 -0700678 // Skip arguments passed in registers.
679 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
680 ++NumXmmArgs;
681 continue;
682 }
683 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700684 }
685
686 // Fill in stack offsets for locals.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700687 size_t TotalGlobalsSize = GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700688 GlobalsSize = 0;
689 LocalsSize.assign(LocalsSize.size(), 0);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700690 size_t NextStackOffset = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700691 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
692 I != E; ++I) {
693 Variable *Var = *I;
694 if (Var->hasReg()) {
695 RegsUsed[Var->getRegNum()] = true;
696 continue;
697 }
698 if (Var->getIsArg())
699 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700700 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
701 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700702 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
703 if (Variable *Linked = Var->getPreferredRegister()) {
704 if (!Linked->hasReg()) {
705 // TODO: Make sure Linked has already been assigned a stack
706 // slot.
707 Var->setStackOffset(Linked->getStackOffset());
708 continue;
709 }
710 }
711 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700712 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700713 if (SimpleCoalescing) {
714 if (Var->isMultiblockLife()) {
715 GlobalsSize += Increment;
716 NextStackOffset = GlobalsSize;
717 } else {
718 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
719 LocalsSize[NodeIndex] += Increment;
720 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
721 }
722 } else {
723 NextStackOffset += Increment;
724 }
725 if (IsEbpBasedFrame)
726 Var->setStackOffset(-NextStackOffset);
727 else
728 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
729 }
730 this->FrameSizeLocals = NextStackOffset;
731 this->HasComputedFrame = true;
732
733 if (Func->getContext()->isVerbose(IceV_Frame)) {
734 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
735 << "\n"
736 << "InArgsSizeBytes=" << InArgsSizeBytes
737 << "\n"
738 << "PreservedRegsSizeBytes="
739 << PreservedRegsSizeBytes << "\n";
740 }
741}
742
743void TargetX8632::addEpilog(CfgNode *Node) {
744 InstList &Insts = Node->getInsts();
745 InstList::reverse_iterator RI, E;
746 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
747 if (llvm::isa<InstX8632Ret>(*RI))
748 break;
749 }
750 if (RI == E)
751 return;
752
753 // Convert the reverse_iterator position into its corresponding
754 // (forward) iterator position.
755 InstList::iterator InsertPoint = RI.base();
756 --InsertPoint;
757 Context.init(Node);
758 Context.setInsertPoint(InsertPoint);
759
760 Variable *esp = getPhysicalRegister(Reg_esp);
761 if (IsEbpBasedFrame) {
762 Variable *ebp = getPhysicalRegister(Reg_ebp);
763 _mov(esp, ebp);
764 _pop(ebp);
765 } else {
766 // add esp, LocalsSizeBytes
767 if (LocalsSizeBytes)
768 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
769 }
770
771 // Add pop instructions for preserved registers.
772 llvm::SmallBitVector CalleeSaves =
773 getRegisterSet(RegSet_CalleeSave, RegSet_None);
774 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
775 SizeT j = CalleeSaves.size() - i - 1;
776 if (j == Reg_ebp && IsEbpBasedFrame)
777 continue;
778 if (CalleeSaves[j] && RegsUsed[j]) {
779 _pop(getPhysicalRegister(j));
780 }
781 }
782}
783
Jim Stichnothf61d5b22014-05-23 13:31:24 -0700784template <typename T> struct PoolTypeConverter {};
785
786template <> struct PoolTypeConverter<float> {
787 typedef float PrimitiveFpType;
788 typedef uint32_t PrimitiveIntType;
789 typedef ConstantFloat IceType;
790 static const Type Ty = IceType_f32;
791 static const char *TypeName;
792 static const char *AsmTag;
793 static const char *PrintfString;
794};
795const char *PoolTypeConverter<float>::TypeName = "float";
796const char *PoolTypeConverter<float>::AsmTag = ".long";
797const char *PoolTypeConverter<float>::PrintfString = "0x%x";
798
799template <> struct PoolTypeConverter<double> {
800 typedef double PrimitiveFpType;
801 typedef uint64_t PrimitiveIntType;
802 typedef ConstantDouble IceType;
803 static const Type Ty = IceType_f64;
804 static const char *TypeName;
805 static const char *AsmTag;
806 static const char *PrintfString;
807};
808const char *PoolTypeConverter<double>::TypeName = "double";
809const char *PoolTypeConverter<double>::AsmTag = ".quad";
810const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
811
812template <typename T> void TargetX8632::emitConstantPool() const {
813 Ostream &Str = Ctx->getStrEmit();
814 Type Ty = T::Ty;
815 SizeT Align = typeAlignInBytes(Ty);
816 ConstantList Pool = Ctx->getConstantPool(Ty);
817
818 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
819 << "\n";
820 Str << "\t.align\t" << Align << "\n";
821 for (ConstantList::const_iterator I = Pool.begin(), E = Pool.end(); I != E;
822 ++I) {
823 typename T::IceType *Const = llvm::cast<typename T::IceType>(*I);
824 typename T::PrimitiveFpType Value = Const->getValue();
825 // Use memcpy() to copy bits from Value into RawValue in a way
826 // that avoids breaking strict-aliasing rules.
827 typename T::PrimitiveIntType RawValue;
828 memcpy(&RawValue, &Value, sizeof(Value));
829 char buf[30];
830 int CharsPrinted =
831 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
832 assert(CharsPrinted >= 0 &&
833 (size_t)CharsPrinted < llvm::array_lengthof(buf));
834 (void)CharsPrinted; // avoid warnings if asserts are disabled
835 Str << "L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
836 Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
837 << Value << "\n";
838 }
839}
840
841void TargetX8632::emitConstants() const {
842 emitConstantPool<PoolTypeConverter<float> >();
843 emitConstantPool<PoolTypeConverter<double> >();
844
845 // No need to emit constants from the int pool since (for x86) they
846 // are embedded as immediates in the instructions.
847}
848
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700849void TargetX8632::split64(Variable *Var) {
850 switch (Var->getType()) {
851 default:
852 return;
853 case IceType_i64:
854 // TODO: Only consider F64 if we need to push each half when
855 // passing as an argument to a function call. Note that each half
856 // is still typed as I32.
857 case IceType_f64:
858 break;
859 }
860 Variable *Lo = Var->getLo();
861 Variable *Hi = Var->getHi();
862 if (Lo) {
863 assert(Hi);
864 return;
865 }
866 assert(Hi == NULL);
867 Lo = Func->makeVariable(IceType_i32, Context.getNode(),
868 Var->getName() + "__lo");
869 Hi = Func->makeVariable(IceType_i32, Context.getNode(),
870 Var->getName() + "__hi");
871 Var->setLoHi(Lo, Hi);
872 if (Var->getIsArg()) {
873 Lo->setIsArg(Func);
874 Hi->setIsArg(Func);
875 }
876}
877
878Operand *TargetX8632::loOperand(Operand *Operand) {
879 assert(Operand->getType() == IceType_i64);
880 if (Operand->getType() != IceType_i64)
881 return Operand;
882 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
883 split64(Var);
884 return Var->getLo();
885 }
886 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
887 uint64_t Mask = (1ull << 32) - 1;
888 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);
889 }
890 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
891 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
892 Mem->getOffset(), Mem->getIndex(),
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700893 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700894 }
895 llvm_unreachable("Unsupported operand type");
896 return NULL;
897}
898
899Operand *TargetX8632::hiOperand(Operand *Operand) {
900 assert(Operand->getType() == IceType_i64);
901 if (Operand->getType() != IceType_i64)
902 return Operand;
903 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
904 split64(Var);
905 return Var->getHi();
906 }
907 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
908 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);
909 }
910 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
911 Constant *Offset = Mem->getOffset();
912 if (Offset == NULL)
913 Offset = Ctx->getConstantInt(IceType_i32, 4);
914 else if (ConstantInteger *IntOffset =
915 llvm::dyn_cast<ConstantInteger>(Offset)) {
916 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());
917 } else if (ConstantRelocatable *SymOffset =
918 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
919 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
920 SymOffset->getName());
921 }
922 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700923 Mem->getIndex(), Mem->getShift(),
924 Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700925 }
926 llvm_unreachable("Unsupported operand type");
927 return NULL;
928}
929
930llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
931 RegSetMask Exclude) const {
932 llvm::SmallBitVector Registers(Reg_NUM);
933
934#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
935 frameptr, isI8, isInt, isFP) \
936 if (scratch && (Include & RegSet_CallerSave)) \
937 Registers[val] = true; \
938 if (preserved && (Include & RegSet_CalleeSave)) \
939 Registers[val] = true; \
940 if (stackptr && (Include & RegSet_StackPointer)) \
941 Registers[val] = true; \
942 if (frameptr && (Include & RegSet_FramePointer)) \
943 Registers[val] = true; \
944 if (scratch && (Exclude & RegSet_CallerSave)) \
945 Registers[val] = false; \
946 if (preserved && (Exclude & RegSet_CalleeSave)) \
947 Registers[val] = false; \
948 if (stackptr && (Exclude & RegSet_StackPointer)) \
949 Registers[val] = false; \
950 if (frameptr && (Exclude & RegSet_FramePointer)) \
951 Registers[val] = false;
952
953 REGX8632_TABLE
954
955#undef X
956
957 return Registers;
958}
959
960void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
961 IsEbpBasedFrame = true;
962 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
963 // the number of adjustments of esp, etc.
964 Variable *esp = getPhysicalRegister(Reg_esp);
965 Operand *TotalSize = legalize(Inst->getSizeInBytes());
966 Variable *Dest = Inst->getDest();
967 _sub(esp, TotalSize);
968 _mov(Dest, esp);
969}
970
971void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
972 Variable *Dest = Inst->getDest();
973 Operand *Src0 = legalize(Inst->getSrc(0));
974 Operand *Src1 = legalize(Inst->getSrc(1));
975 if (Dest->getType() == IceType_i64) {
976 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
977 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
978 Operand *Src0Lo = loOperand(Src0);
979 Operand *Src0Hi = hiOperand(Src0);
980 Operand *Src1Lo = loOperand(Src1);
981 Operand *Src1Hi = hiOperand(Src1);
982 Variable *T_Lo = NULL, *T_Hi = NULL;
983 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -0700984 case InstArithmetic::_num:
985 llvm_unreachable("Unknown arithmetic operator");
986 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700987 case InstArithmetic::Add:
988 _mov(T_Lo, Src0Lo);
989 _add(T_Lo, Src1Lo);
990 _mov(DestLo, T_Lo);
991 _mov(T_Hi, Src0Hi);
992 _adc(T_Hi, Src1Hi);
993 _mov(DestHi, T_Hi);
994 break;
995 case InstArithmetic::And:
996 _mov(T_Lo, Src0Lo);
997 _and(T_Lo, Src1Lo);
998 _mov(DestLo, T_Lo);
999 _mov(T_Hi, Src0Hi);
1000 _and(T_Hi, Src1Hi);
1001 _mov(DestHi, T_Hi);
1002 break;
1003 case InstArithmetic::Or:
1004 _mov(T_Lo, Src0Lo);
1005 _or(T_Lo, Src1Lo);
1006 _mov(DestLo, T_Lo);
1007 _mov(T_Hi, Src0Hi);
1008 _or(T_Hi, Src1Hi);
1009 _mov(DestHi, T_Hi);
1010 break;
1011 case InstArithmetic::Xor:
1012 _mov(T_Lo, Src0Lo);
1013 _xor(T_Lo, Src1Lo);
1014 _mov(DestLo, T_Lo);
1015 _mov(T_Hi, Src0Hi);
1016 _xor(T_Hi, Src1Hi);
1017 _mov(DestHi, T_Hi);
1018 break;
1019 case InstArithmetic::Sub:
1020 _mov(T_Lo, Src0Lo);
1021 _sub(T_Lo, Src1Lo);
1022 _mov(DestLo, T_Lo);
1023 _mov(T_Hi, Src0Hi);
1024 _sbb(T_Hi, Src1Hi);
1025 _mov(DestHi, T_Hi);
1026 break;
1027 case InstArithmetic::Mul: {
1028 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1029 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
1030 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);
1031 // gcc does the following:
1032 // a=b*c ==>
1033 // t1 = b.hi; t1 *=(imul) c.lo
1034 // t2 = c.hi; t2 *=(imul) b.lo
1035 // t3:eax = b.lo
1036 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1037 // a.lo = t4.lo
1038 // t4.hi += t1
1039 // t4.hi += t2
1040 // a.hi = t4.hi
1041 _mov(T_1, Src0Hi);
1042 _imul(T_1, Src1Lo);
1043 _mov(T_2, Src1Hi);
1044 _imul(T_2, Src0Lo);
1045 _mov(T_3, Src0Lo, Reg_eax);
1046 _mul(T_4Lo, T_3, Src1Lo);
1047 // The mul instruction produces two dest variables, edx:eax. We
1048 // create a fake definition of edx to account for this.
1049 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1050 _mov(DestLo, T_4Lo);
1051 _add(T_4Hi, T_1);
1052 _add(T_4Hi, T_2);
1053 _mov(DestHi, T_4Hi);
1054 } break;
1055 case InstArithmetic::Shl: {
1056 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1057 // gcc does the following:
1058 // a=b<<c ==>
1059 // t1:ecx = c.lo & 0xff
1060 // t2 = b.lo
1061 // t3 = b.hi
1062 // t3 = shld t3, t2, t1
1063 // t2 = shl t2, t1
1064 // test t1, 0x20
1065 // je L1
1066 // use(t3)
1067 // t3 = t2
1068 // t2 = 0
1069 // L1:
1070 // a.lo = t2
1071 // a.hi = t3
1072 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1073 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001074 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001075 InstX8632Label *Label = InstX8632Label::create(Func, this);
1076 _mov(T_1, Src1Lo, Reg_ecx);
1077 _mov(T_2, Src0Lo);
1078 _mov(T_3, Src0Hi);
1079 _shld(T_3, T_2, T_1);
1080 _shl(T_2, T_1);
1081 _test(T_1, BitTest);
1082 _br(InstX8632Br::Br_e, Label);
1083 // Because of the intra-block control flow, we need to fake a use
1084 // of T_3 to prevent its earlier definition from being dead-code
1085 // eliminated in the presence of its later definition.
1086 Context.insert(InstFakeUse::create(Func, T_3));
1087 _mov(T_3, T_2);
1088 _mov(T_2, Zero);
1089 Context.insert(Label);
1090 _mov(DestLo, T_2);
1091 _mov(DestHi, T_3);
1092 } break;
1093 case InstArithmetic::Lshr: {
1094 // a=b>>c (unsigned) ==>
1095 // t1:ecx = c.lo & 0xff
1096 // t2 = b.lo
1097 // t3 = b.hi
1098 // t2 = shrd t2, t3, t1
1099 // t3 = shr t3, t1
1100 // test t1, 0x20
1101 // je L1
1102 // use(t2)
1103 // t2 = t3
1104 // t3 = 0
1105 // L1:
1106 // a.lo = t2
1107 // a.hi = t3
1108 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1109 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001110 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001111 InstX8632Label *Label = InstX8632Label::create(Func, this);
1112 _mov(T_1, Src1Lo, Reg_ecx);
1113 _mov(T_2, Src0Lo);
1114 _mov(T_3, Src0Hi);
1115 _shrd(T_2, T_3, T_1);
1116 _shr(T_3, T_1);
1117 _test(T_1, BitTest);
1118 _br(InstX8632Br::Br_e, Label);
1119 // Because of the intra-block control flow, we need to fake a use
1120 // of T_3 to prevent its earlier definition from being dead-code
1121 // eliminated in the presence of its later definition.
1122 Context.insert(InstFakeUse::create(Func, T_2));
1123 _mov(T_2, T_3);
1124 _mov(T_3, Zero);
1125 Context.insert(Label);
1126 _mov(DestLo, T_2);
1127 _mov(DestHi, T_3);
1128 } break;
1129 case InstArithmetic::Ashr: {
1130 // a=b>>c (signed) ==>
1131 // t1:ecx = c.lo & 0xff
1132 // t2 = b.lo
1133 // t3 = b.hi
1134 // t2 = shrd t2, t3, t1
1135 // t3 = sar t3, t1
1136 // test t1, 0x20
1137 // je L1
1138 // use(t2)
1139 // t2 = t3
1140 // t3 = sar t3, 0x1f
1141 // L1:
1142 // a.lo = t2
1143 // a.hi = t3
1144 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1145 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
1146 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);
1147 InstX8632Label *Label = InstX8632Label::create(Func, this);
1148 _mov(T_1, Src1Lo, Reg_ecx);
1149 _mov(T_2, Src0Lo);
1150 _mov(T_3, Src0Hi);
1151 _shrd(T_2, T_3, T_1);
1152 _sar(T_3, T_1);
1153 _test(T_1, BitTest);
1154 _br(InstX8632Br::Br_e, Label);
1155 // Because of the intra-block control flow, we need to fake a use
1156 // of T_3 to prevent its earlier definition from being dead-code
1157 // eliminated in the presence of its later definition.
1158 Context.insert(InstFakeUse::create(Func, T_2));
1159 _mov(T_2, T_3);
1160 _sar(T_3, SignExtend);
1161 Context.insert(Label);
1162 _mov(DestLo, T_2);
1163 _mov(DestHi, T_3);
1164 } break;
1165 case InstArithmetic::Udiv: {
1166 const SizeT MaxSrcs = 2;
1167 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
1168 Call->addArg(Inst->getSrc(0));
1169 Call->addArg(Inst->getSrc(1));
1170 lowerCall(Call);
1171 } break;
1172 case InstArithmetic::Sdiv: {
1173 const SizeT MaxSrcs = 2;
1174 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
1175 Call->addArg(Inst->getSrc(0));
1176 Call->addArg(Inst->getSrc(1));
1177 lowerCall(Call);
1178 } break;
1179 case InstArithmetic::Urem: {
1180 const SizeT MaxSrcs = 2;
1181 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
1182 Call->addArg(Inst->getSrc(0));
1183 Call->addArg(Inst->getSrc(1));
1184 lowerCall(Call);
1185 } break;
1186 case InstArithmetic::Srem: {
1187 const SizeT MaxSrcs = 2;
1188 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
1189 Call->addArg(Inst->getSrc(0));
1190 Call->addArg(Inst->getSrc(1));
1191 lowerCall(Call);
1192 } break;
1193 case InstArithmetic::Fadd:
1194 case InstArithmetic::Fsub:
1195 case InstArithmetic::Fmul:
1196 case InstArithmetic::Fdiv:
1197 case InstArithmetic::Frem:
1198 llvm_unreachable("FP instruction with i64 type");
1199 break;
1200 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001201 } else if (isVectorType(Dest->getType())) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001202 // TODO: Trap on integer divide and integer modulo by zero.
1203 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1204 //
1205 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
1206 // registers. This is a workaround of the fact that there is no
1207 // support for aligning stack operands. Once there is support,
1208 // remove LEGAL_HACK.
1209#define LEGAL_HACK(s) legalizeToVar((s))
Matt Wala8d1072e2014-07-11 15:43:51 -07001210 switch (Inst->getOp()) {
1211 case InstArithmetic::_num:
1212 llvm_unreachable("Unknown arithmetic operator");
1213 break;
Matt Wala7fa22d82014-07-17 12:41:31 -07001214 case InstArithmetic::Add: {
1215 Variable *T = makeReg(Dest->getType());
1216 _movp(T, Src0);
1217 _padd(T, LEGAL_HACK(Src1));
1218 _movp(Dest, T);
1219 } break;
1220 case InstArithmetic::And: {
1221 Variable *T = makeReg(Dest->getType());
1222 _movp(T, Src0);
1223 _pand(T, LEGAL_HACK(Src1));
1224 _movp(Dest, T);
1225 } break;
1226 case InstArithmetic::Or: {
1227 Variable *T = makeReg(Dest->getType());
1228 _movp(T, Src0);
1229 _por(T, LEGAL_HACK(Src1));
1230 _movp(Dest, T);
1231 } break;
1232 case InstArithmetic::Xor: {
1233 Variable *T = makeReg(Dest->getType());
1234 _movp(T, Src0);
1235 _pxor(T, LEGAL_HACK(Src1));
1236 _movp(Dest, T);
1237 } break;
1238 case InstArithmetic::Sub: {
1239 Variable *T = makeReg(Dest->getType());
1240 _movp(T, Src0);
1241 _psub(T, LEGAL_HACK(Src1));
1242 _movp(Dest, T);
1243 } break;
1244 case InstArithmetic::Mul: {
Matt Wala0a450512014-07-30 12:44:39 -07001245 bool TypesAreValidForPmull =
1246 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1247 bool InstructionSetIsValidForPmull =
1248 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1249 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1250 Variable *T = makeReg(Dest->getType());
1251 _movp(T, Src0);
Matt Walae3777672014-07-31 09:06:17 -07001252 _pmull(T, LEGAL_HACK(Src1));
Matt Wala0a450512014-07-30 12:44:39 -07001253 _movp(Dest, T);
1254 } else if (Dest->getType() == IceType_v4i32) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001255 // Lowering sequence:
1256 // Note: The mask arguments have index 0 on the left.
1257 //
1258 // movups T1, Src0
1259 // pshufd T2, Src0, {1,0,3,0}
1260 // pshufd T3, Src1, {1,0,3,0}
1261 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1262 // pmuludq T1, Src1
1263 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1264 // pmuludq T2, T3
1265 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1266 // shufps T1, T2, {0,2,0,2}
1267 // pshufd T4, T1, {0,2,1,3}
1268 // movups Dest, T4
Matt Wala7fa22d82014-07-17 12:41:31 -07001269
1270 // Mask that directs pshufd to create a vector with entries
1271 // Src[1, 0, 3, 0]
1272 const unsigned Constant1030 = 0x31;
1273 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);
1274 // Mask that directs shufps to create a vector with entries
1275 // Dest[0, 2], Src[0, 2]
1276 const unsigned Mask0202 = 0x88;
1277 // Mask that directs pshufd to create a vector with entries
1278 // Src[0, 2, 1, 3]
1279 const unsigned Mask0213 = 0xd8;
1280 Variable *T1 = makeReg(IceType_v4i32);
1281 Variable *T2 = makeReg(IceType_v4i32);
1282 Variable *T3 = makeReg(IceType_v4i32);
1283 Variable *T4 = makeReg(IceType_v4i32);
1284 _movp(T1, Src0);
1285 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R
1286 // with Src1 after stack operand alignment support is
1287 // implemented.
1288 Variable *Src0R = LEGAL_HACK(Src0);
1289 Variable *Src1R = LEGAL_HACK(Src1);
1290 _pshufd(T2, Src0R, Mask1030);
1291 _pshufd(T3, Src1R, Mask1030);
1292 _pmuludq(T1, Src1R);
1293 _pmuludq(T2, T3);
1294 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
1295 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
1296 _movp(Dest, T4);
Matt Wala7fa22d82014-07-17 12:41:31 -07001297 } else {
1298 assert(Dest->getType() == IceType_v16i8);
1299 // Sz_mul_v16i8
1300 const IceString Helper = "Sz_mul_v16i8";
1301 const SizeT MaxSrcs = 2;
1302 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1303 Call->addArg(Src0);
1304 Call->addArg(Src1);
1305 lowerCall(Call);
1306 }
1307 } break;
1308 case InstArithmetic::Shl: {
1309 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8
1310 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());
1311 const SizeT MaxSrcs = 2;
1312 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1313 Call->addArg(Src0);
1314 Call->addArg(Src1);
1315 lowerCall(Call);
1316 } break;
1317 case InstArithmetic::Lshr: {
1318 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
1319 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
1320 const SizeT MaxSrcs = 2;
1321 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1322 Call->addArg(Src0);
1323 Call->addArg(Src1);
1324 lowerCall(Call);
1325 } break;
1326 case InstArithmetic::Ashr: {
1327 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8
1328 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());
1329 const SizeT MaxSrcs = 2;
1330 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1331 Call->addArg(Src0);
1332 Call->addArg(Src1);
1333 lowerCall(Call);
1334 } break;
1335 case InstArithmetic::Udiv: {
1336 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8
1337 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());
1338 const SizeT MaxSrcs = 2;
1339 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1340 Call->addArg(Src0);
1341 Call->addArg(Src1);
1342 lowerCall(Call);
1343 } break;
1344 case InstArithmetic::Sdiv: {
1345 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8
1346 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());
1347 const SizeT MaxSrcs = 2;
1348 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1349 Call->addArg(Src0);
1350 Call->addArg(Src1);
1351 lowerCall(Call);
1352 } break;
1353 case InstArithmetic::Urem: {
1354 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8
1355 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());
1356 const SizeT MaxSrcs = 2;
1357 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1358 Call->addArg(Src0);
1359 Call->addArg(Src1);
1360 lowerCall(Call);
1361 } break;
1362 case InstArithmetic::Srem: {
1363 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8
1364 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());
1365 const SizeT MaxSrcs = 2;
1366 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1367 Call->addArg(Src0);
1368 Call->addArg(Src1);
1369 lowerCall(Call);
1370 } break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001371 case InstArithmetic::Fadd: {
1372 Variable *T = makeReg(Dest->getType());
1373 _movp(T, Src0);
Matt Wala7fa22d82014-07-17 12:41:31 -07001374 _addps(T, LEGAL_HACK(Src1));
Matt Wala8d1072e2014-07-11 15:43:51 -07001375 _movp(Dest, T);
1376 } break;
1377 case InstArithmetic::Fsub: {
1378 Variable *T = makeReg(Dest->getType());
1379 _movp(T, Src0);
Matt Wala7fa22d82014-07-17 12:41:31 -07001380 _subps(T, LEGAL_HACK(Src1));
Matt Wala8d1072e2014-07-11 15:43:51 -07001381 _movp(Dest, T);
1382 } break;
1383 case InstArithmetic::Fmul: {
1384 Variable *T = makeReg(Dest->getType());
1385 _movp(T, Src0);
Matt Wala7fa22d82014-07-17 12:41:31 -07001386 _mulps(T, LEGAL_HACK(Src1));
Matt Wala8d1072e2014-07-11 15:43:51 -07001387 _movp(Dest, T);
1388 } break;
1389 case InstArithmetic::Fdiv: {
1390 Variable *T = makeReg(Dest->getType());
1391 _movp(T, Src0);
Matt Wala7fa22d82014-07-17 12:41:31 -07001392 _divps(T, LEGAL_HACK(Src1));
Matt Wala8d1072e2014-07-11 15:43:51 -07001393 _movp(Dest, T);
1394 } break;
1395 case InstArithmetic::Frem: {
Matt Wala0ecabc82014-07-14 17:18:14 -07001396 const SizeT MaxSrcs = 2;
Matt Wala7fa22d82014-07-17 12:41:31 -07001397 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);
Matt Wala8d1072e2014-07-11 15:43:51 -07001398 Call->addArg(Src0);
Matt Wala0ecabc82014-07-14 17:18:14 -07001399 Call->addArg(Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001400 lowerCall(Call);
1401 } break;
1402 }
Matt Wala7fa22d82014-07-17 12:41:31 -07001403#undef LEGAL_HACK
Matt Wala8d1072e2014-07-11 15:43:51 -07001404 } else { // Dest->getType() is non-i64 scalar
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001405 Variable *T_edx = NULL;
1406 Variable *T = NULL;
1407 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001408 case InstArithmetic::_num:
1409 llvm_unreachable("Unknown arithmetic operator");
1410 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001411 case InstArithmetic::Add:
1412 _mov(T, Src0);
1413 _add(T, Src1);
1414 _mov(Dest, T);
1415 break;
1416 case InstArithmetic::And:
1417 _mov(T, Src0);
1418 _and(T, Src1);
1419 _mov(Dest, T);
1420 break;
1421 case InstArithmetic::Or:
1422 _mov(T, Src0);
1423 _or(T, Src1);
1424 _mov(Dest, T);
1425 break;
1426 case InstArithmetic::Xor:
1427 _mov(T, Src0);
1428 _xor(T, Src1);
1429 _mov(Dest, T);
1430 break;
1431 case InstArithmetic::Sub:
1432 _mov(T, Src0);
1433 _sub(T, Src1);
1434 _mov(Dest, T);
1435 break;
1436 case InstArithmetic::Mul:
1437 // TODO: Optimize for llvm::isa<Constant>(Src1)
1438 // TODO: Strength-reduce multiplications by a constant,
1439 // particularly -1 and powers of 2. Advanced: use lea to
1440 // multiply by 3, 5, 9.
1441 //
1442 // The 8-bit version of imul only allows the form "imul r/m8"
1443 // where T must be in eax.
1444 if (Dest->getType() == IceType_i8)
1445 _mov(T, Src0, Reg_eax);
1446 else
1447 _mov(T, Src0);
1448 _imul(T, Src1);
1449 _mov(Dest, T);
1450 break;
1451 case InstArithmetic::Shl:
1452 _mov(T, Src0);
1453 if (!llvm::isa<Constant>(Src1))
1454 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1455 _shl(T, Src1);
1456 _mov(Dest, T);
1457 break;
1458 case InstArithmetic::Lshr:
1459 _mov(T, Src0);
1460 if (!llvm::isa<Constant>(Src1))
1461 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1462 _shr(T, Src1);
1463 _mov(Dest, T);
1464 break;
1465 case InstArithmetic::Ashr:
1466 _mov(T, Src0);
1467 if (!llvm::isa<Constant>(Src1))
1468 Src1 = legalizeToVar(Src1, false, Reg_ecx);
1469 _sar(T, Src1);
1470 _mov(Dest, T);
1471 break;
1472 case InstArithmetic::Udiv:
Jan Voung70d68832014-06-17 10:02:37 -07001473 // div and idiv are the few arithmetic operators that do not allow
1474 // immediates as the operand.
1475 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001476 if (Dest->getType() == IceType_i8) {
1477 Variable *T_ah = NULL;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001478 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001479 _mov(T, Src0, Reg_eax);
1480 _mov(T_ah, Zero, Reg_ah);
1481 _div(T, Src1, T_ah);
1482 _mov(Dest, T);
1483 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001484 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001485 _mov(T, Src0, Reg_eax);
1486 _mov(T_edx, Zero, Reg_edx);
1487 _div(T, Src1, T_edx);
1488 _mov(Dest, T);
1489 }
1490 break;
1491 case InstArithmetic::Sdiv:
Jan Voung70d68832014-06-17 10:02:37 -07001492 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001493 T_edx = makeReg(IceType_i32, Reg_edx);
1494 _mov(T, Src0, Reg_eax);
1495 _cdq(T_edx, T);
1496 _idiv(T, Src1, T_edx);
1497 _mov(Dest, T);
1498 break;
1499 case InstArithmetic::Urem:
Jan Voung70d68832014-06-17 10:02:37 -07001500 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001501 if (Dest->getType() == IceType_i8) {
1502 Variable *T_ah = NULL;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001503 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001504 _mov(T, Src0, Reg_eax);
1505 _mov(T_ah, Zero, Reg_ah);
1506 _div(T_ah, Src1, T);
1507 _mov(Dest, T_ah);
1508 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001509 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001510 _mov(T_edx, Zero, Reg_edx);
1511 _mov(T, Src0, Reg_eax);
1512 _div(T_edx, Src1, T);
1513 _mov(Dest, T_edx);
1514 }
1515 break;
1516 case InstArithmetic::Srem:
Jan Voung70d68832014-06-17 10:02:37 -07001517 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001518 T_edx = makeReg(IceType_i32, Reg_edx);
1519 _mov(T, Src0, Reg_eax);
1520 _cdq(T_edx, T);
1521 _idiv(T_edx, Src1, T);
1522 _mov(Dest, T_edx);
1523 break;
1524 case InstArithmetic::Fadd:
1525 _mov(T, Src0);
1526 _addss(T, Src1);
1527 _mov(Dest, T);
1528 break;
1529 case InstArithmetic::Fsub:
1530 _mov(T, Src0);
1531 _subss(T, Src1);
1532 _mov(Dest, T);
1533 break;
1534 case InstArithmetic::Fmul:
1535 _mov(T, Src0);
1536 _mulss(T, Src1);
1537 _mov(Dest, T);
1538 break;
1539 case InstArithmetic::Fdiv:
1540 _mov(T, Src0);
1541 _divss(T, Src1);
1542 _mov(Dest, T);
1543 break;
1544 case InstArithmetic::Frem: {
1545 const SizeT MaxSrcs = 2;
1546 Type Ty = Dest->getType();
1547 InstCall *Call =
1548 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);
1549 Call->addArg(Src0);
1550 Call->addArg(Src1);
1551 return lowerCall(Call);
1552 } break;
1553 }
1554 }
1555}
1556
1557void TargetX8632::lowerAssign(const InstAssign *Inst) {
1558 Variable *Dest = Inst->getDest();
1559 Operand *Src0 = Inst->getSrc(0);
1560 assert(Dest->getType() == Src0->getType());
1561 if (Dest->getType() == IceType_i64) {
1562 Src0 = legalize(Src0);
1563 Operand *Src0Lo = loOperand(Src0);
1564 Operand *Src0Hi = hiOperand(Src0);
1565 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1566 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1567 Variable *T_Lo = NULL, *T_Hi = NULL;
1568 _mov(T_Lo, Src0Lo);
1569 _mov(DestLo, T_Lo);
1570 _mov(T_Hi, Src0Hi);
1571 _mov(DestHi, T_Hi);
1572 } else {
1573 const bool AllowOverlap = true;
1574 // RI is either a physical register or an immediate.
1575 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap);
Matt Wala45a06232014-07-09 16:33:22 -07001576 if (isVectorType(Dest->getType()))
1577 _movp(Dest, RI);
1578 else
1579 _mov(Dest, RI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001580 }
1581}
1582
1583void TargetX8632::lowerBr(const InstBr *Inst) {
1584 if (Inst->isUnconditional()) {
1585 _br(Inst->getTargetUnconditional());
1586 } else {
Jim Stichnoth206833c2014-08-07 10:58:05 -07001587 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001588 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001589 _cmp(Src0, Zero);
1590 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1591 }
1592}
1593
1594void TargetX8632::lowerCall(const InstCall *Instr) {
Matt Wala45a06232014-07-09 16:33:22 -07001595 // Classify each argument operand according to the location where the
1596 // argument is passed.
1597 OperandList XmmArgs;
1598 OperandList StackArgs;
1599 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1600 Operand *Arg = Instr->getArg(i);
1601 if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
1602 XmmArgs.push_back(Arg);
1603 } else {
1604 StackArgs.push_back(Arg);
1605 }
1606 }
1607 // For stack arguments, generate a sequence of push instructions,
1608 // pushing right to left, keeping track of stack offsets in case a
1609 // push involves a stack operand and we are using an esp-based frame.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001610 uint32_t StackOffset = 0;
Matt Wala45a06232014-07-09 16:33:22 -07001611 // TODO: Consolidate the stack adjustment for function calls by
1612 // reserving enough space for the arguments only once.
1613 //
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001614 // TODO: If for some reason the call instruction gets dead-code
1615 // eliminated after lowering, we would need to ensure that the
1616 // pre-call push instructions and the post-call esp adjustment get
1617 // eliminated as well.
Matt Wala45a06232014-07-09 16:33:22 -07001618 for (OperandList::reverse_iterator I = StackArgs.rbegin(),
1619 E = StackArgs.rend(); I != E; ++I) {
1620 Operand *Arg = legalize(*I);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001621 if (Arg->getType() == IceType_i64) {
1622 _push(hiOperand(Arg));
1623 _push(loOperand(Arg));
Matt Wala45a06232014-07-09 16:33:22 -07001624 } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) {
1625 // If the Arg turns out to be a memory operand, more than one push
1626 // instruction is required. This ends up being somewhat clumsy in
1627 // the current IR, so we use a workaround. Force the operand into
1628 // a (xmm) register, and then push the register. An xmm register
1629 // push is actually not possible in x86, but the Push instruction
1630 // emitter handles this by decrementing the stack pointer and
1631 // directly writing the xmm register value.
1632 _push(legalize(Arg, Legal_Reg));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001633 } else {
Jan Voungbdbe4022014-06-17 17:12:26 -07001634 // Otherwise PNaCl requires parameter types to be at least 32-bits.
1635 assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001636 _push(Arg);
1637 }
1638 StackOffset += typeWidthInBytesOnStack(Arg->getType());
1639 }
Matt Wala45a06232014-07-09 16:33:22 -07001640 // Copy arguments to be passed in registers to the appropriate
1641 // registers.
1642 // TODO: Investigate the impact of lowering arguments passed in
1643 // registers after lowering stack arguments as opposed to the other
1644 // way around. Lowering register arguments after stack arguments may
1645 // reduce register pressure. On the other hand, lowering register
1646 // arguments first (before stack arguments) may result in more compact
1647 // code, as the memory operand displacements may end up being smaller
1648 // before any stack adjustment is done.
1649 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
1650 Variable *Reg = legalizeToVar(XmmArgs[i], false, Reg_xmm0 + i);
1651 // Generate a FakeUse of register arguments so that they do not get
1652 // dead code eliminated as a result of the FakeKill of scratch
1653 // registers after the call.
1654 Context.insert(InstFakeUse::create(Func, Reg));
1655 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001656 // Generate the call instruction. Assign its result to a temporary
1657 // with high register allocation weight.
1658 Variable *Dest = Instr->getDest();
Matt Wala45a06232014-07-09 16:33:22 -07001659 // ReturnReg doubles as ReturnRegLo as necessary.
1660 Variable *ReturnReg = NULL;
1661 Variable *ReturnRegHi = NULL;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001662 if (Dest) {
1663 switch (Dest->getType()) {
1664 case IceType_NUM:
1665 llvm_unreachable("Invalid Call dest type");
1666 break;
1667 case IceType_void:
1668 break;
1669 case IceType_i1:
1670 case IceType_i8:
1671 case IceType_i16:
1672 case IceType_i32:
Matt Wala45a06232014-07-09 16:33:22 -07001673 ReturnReg = makeReg(Dest->getType(), Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001674 break;
1675 case IceType_i64:
Matt Wala45a06232014-07-09 16:33:22 -07001676 ReturnReg = makeReg(IceType_i32, Reg_eax);
1677 ReturnRegHi = makeReg(IceType_i32, Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001678 break;
1679 case IceType_f32:
1680 case IceType_f64:
Matt Wala45a06232014-07-09 16:33:22 -07001681 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with
1682 // the fstp instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001683 break;
Matt Wala928f1292014-07-07 16:50:46 -07001684 case IceType_v4i1:
1685 case IceType_v8i1:
1686 case IceType_v16i1:
1687 case IceType_v16i8:
1688 case IceType_v8i16:
1689 case IceType_v4i32:
Matt Wala45a06232014-07-09 16:33:22 -07001690 case IceType_v4f32:
1691 ReturnReg = makeReg(Dest->getType(), Reg_xmm0);
1692 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001693 }
1694 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07001695 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once
1696 // a proper emitter is used.
1697 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);
Matt Wala45a06232014-07-09 16:33:22 -07001698 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001699 Context.insert(NewCall);
Matt Wala45a06232014-07-09 16:33:22 -07001700 if (ReturnRegHi)
1701 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001702
1703 // Add the appropriate offset to esp.
1704 if (StackOffset) {
1705 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1706 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
1707 }
1708
1709 // Insert a register-kill pseudo instruction.
1710 VarList KilledRegs;
1711 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1712 if (ScratchRegs[i])
1713 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1714 }
1715 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
1716
1717 // Generate a FakeUse to keep the call live if necessary.
Matt Wala45a06232014-07-09 16:33:22 -07001718 if (Instr->hasSideEffects() && ReturnReg) {
1719 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001720 Context.insert(FakeUse);
1721 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001722
Matt Wala45a06232014-07-09 16:33:22 -07001723 if (!Dest)
1724 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001725
Matt Wala45a06232014-07-09 16:33:22 -07001726 // Assign the result of the call to Dest.
1727 if (ReturnReg) {
1728 if (ReturnRegHi) {
1729 assert(Dest->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001730 split64(Dest);
1731 Variable *DestLo = Dest->getLo();
1732 Variable *DestHi = Dest->getHi();
Matt Wala45a06232014-07-09 16:33:22 -07001733 DestLo->setPreferredRegister(ReturnReg, false);
1734 DestHi->setPreferredRegister(ReturnRegHi, false);
1735 _mov(DestLo, ReturnReg);
1736 _mov(DestHi, ReturnRegHi);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001737 } else {
Matt Wala45a06232014-07-09 16:33:22 -07001738 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1739 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1740 isVectorType(Dest->getType()));
1741 Dest->setPreferredRegister(ReturnReg, false);
1742 if (isVectorType(Dest->getType())) {
1743 _movp(Dest, ReturnReg);
1744 } else {
1745 _mov(Dest, ReturnReg);
1746 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001747 }
Matt Wala45a06232014-07-09 16:33:22 -07001748 } else if (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64) {
1749 // Special treatment for an FP function which returns its result in
1750 // st(0).
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001751 _fstp(Dest);
Matt Wala45a06232014-07-09 16:33:22 -07001752 // If Dest ends up being a physical xmm register, the fstp emit code
1753 // will route st(0) through a temporary stack slot.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001754 }
1755}
1756
1757void TargetX8632::lowerCast(const InstCast *Inst) {
1758 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1759 InstCast::OpKind CastKind = Inst->getCastKind();
1760 Variable *Dest = Inst->getDest();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001761 switch (CastKind) {
1762 default:
1763 Func->setError("Cast type not supported");
1764 return;
Jan Voung1ee34162014-06-24 13:43:30 -07001765 case InstCast::Sext: {
1766 // Src0RM is the source operand legalized to physical register or memory,
1767 // but not immediate, since the relevant x86 native instructions don't
1768 // allow an immediate operand. If the operand is an immediate, we could
1769 // consider computing the strength-reduced result at translation time,
1770 // but we're unlikely to see something like that in the bitcode that
1771 // the optimizer wouldn't have already taken care of.
1772 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001773 if (Dest->getType() == IceType_i64) {
1774 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1775 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1776 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1777 Variable *T_Lo = makeReg(DestLo->getType());
1778 if (Src0RM->getType() == IceType_i32)
1779 _mov(T_Lo, Src0RM);
1780 else
1781 _movsx(T_Lo, Src0RM);
1782 _mov(DestLo, T_Lo);
1783 Variable *T_Hi = NULL;
1784 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);
1785 _mov(T_Hi, T_Lo);
1786 _sar(T_Hi, Shift);
1787 _mov(DestHi, T_Hi);
Matt Wala83b80362014-07-16 10:21:30 -07001788 } else if (isVectorType(Dest->getType())) {
1789 Type DestTy = Dest->getType();
1790 if (DestTy == IceType_v16i8) {
1791 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
1792 Variable *OneMask = makeVectorOfOnes(Dest->getType());
1793 Variable *T = makeReg(DestTy);
1794 _movp(T, Src0RM);
1795 _pand(T, OneMask);
1796 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1797 _pcmpgt(T, Zeros);
1798 _movp(Dest, T);
1799 } else {
1800 // width = width(elty) - 1; dest = (src << width) >> width
1801 SizeT ShiftAmount =
1802 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
1803 Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount);
1804 Variable *T = makeReg(DestTy);
1805 _movp(T, Src0RM);
1806 _psll(T, ShiftConstant);
1807 _psra(T, ShiftConstant);
1808 _movp(Dest, T);
1809 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001810 } else {
1811 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
1812 // also copy to the high operand of a 64-bit variable.
1813 // t1 = movsx src; dst = t1
1814 Variable *T = makeReg(Dest->getType());
1815 _movsx(T, Src0RM);
1816 _mov(Dest, T);
1817 }
1818 break;
Jan Voung1ee34162014-06-24 13:43:30 -07001819 }
1820 case InstCast::Zext: {
1821 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001822 if (Dest->getType() == IceType_i64) {
1823 // t1=movzx src; dst.lo=t1; dst.hi=0
Matt Wala43ff7eb2014-06-18 10:30:07 -07001824 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001825 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1826 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1827 Variable *Tmp = makeReg(DestLo->getType());
1828 if (Src0RM->getType() == IceType_i32)
1829 _mov(Tmp, Src0RM);
1830 else
1831 _movzx(Tmp, Src0RM);
1832 _mov(DestLo, Tmp);
1833 _mov(DestHi, Zero);
1834 } else if (Src0RM->getType() == IceType_i1) {
1835 // t = Src0RM; t &= 1; Dest = t
1836 Operand *One = Ctx->getConstantInt(IceType_i32, 1);
1837 Variable *T = makeReg(IceType_i32);
1838 _movzx(T, Src0RM);
1839 _and(T, One);
1840 _mov(Dest, T);
Matt Wala83b80362014-07-16 10:21:30 -07001841 } else if (isVectorType(Dest->getType())) {
1842 // onemask = materialize(1,1,...); dest = onemask & src
1843 Type DestTy = Dest->getType();
1844 Variable *OneMask = makeVectorOfOnes(DestTy);
1845 Variable *T = makeReg(DestTy);
1846 _movp(T, Src0RM);
1847 _pand(T, OneMask);
1848 _movp(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001849 } else {
1850 // t1 = movzx src; dst = t1
1851 Variable *T = makeReg(Dest->getType());
1852 _movzx(T, Src0RM);
1853 _mov(Dest, T);
1854 }
1855 break;
Jan Voung1ee34162014-06-24 13:43:30 -07001856 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001857 case InstCast::Trunc: {
Matt Wala83b80362014-07-16 10:21:30 -07001858 if (isVectorType(Dest->getType())) {
1859 // onemask = materialize(1,1,...); dst = src & onemask
1860 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1861 Type Src0Ty = Src0RM->getType();
1862 Variable *OneMask = makeVectorOfOnes(Src0Ty);
1863 Variable *T = makeReg(Dest->getType());
1864 _movp(T, Src0RM);
1865 _pand(T, OneMask);
1866 _movp(Dest, T);
1867 } else {
1868 Operand *Src0 = Inst->getSrc(0);
1869 if (Src0->getType() == IceType_i64)
1870 Src0 = loOperand(Src0);
1871 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
1872 // t1 = trunc Src0RM; Dest = t1
1873 Variable *T = NULL;
1874 _mov(T, Src0RM);
1875 _mov(Dest, T);
1876 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001877 break;
1878 }
1879 case InstCast::Fptrunc:
1880 case InstCast::Fpext: {
Jan Voung1ee34162014-06-24 13:43:30 -07001881 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001882 // t1 = cvt Src0RM; Dest = t1
1883 Variable *T = makeReg(Dest->getType());
1884 _cvt(T, Src0RM);
1885 _mov(Dest, T);
1886 break;
1887 }
1888 case InstCast::Fptosi:
Matt Wala83b80362014-07-16 10:21:30 -07001889 if (isVectorType(Dest->getType())) {
1890 assert(Dest->getType() == IceType_v4i32 &&
1891 Inst->getSrc(0)->getType() == IceType_v4f32);
1892 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1893 Variable *T = makeReg(Dest->getType());
1894 _cvt(T, Src0RM);
1895 _movp(Dest, T);
1896 } else if (Dest->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001897 // Use a helper for converting floating-point values to 64-bit
1898 // integers. SSE2 appears to have no way to convert from xmm
1899 // registers to something like the edx:eax register pair, and
1900 // gcc and clang both want to use x87 instructions complete with
1901 // temporary manipulation of the status word. This helper is
1902 // not needed for x86-64.
1903 split64(Dest);
1904 const SizeT MaxSrcs = 1;
1905 Type SrcType = Inst->getSrc(0)->getType();
1906 InstCall *Call = makeHelperCall(
1907 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);
1908 // TODO: Call the correct compiler-rt helper function.
1909 Call->addArg(Inst->getSrc(0));
1910 lowerCall(Call);
1911 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07001912 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001913 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1914 Variable *T_1 = makeReg(IceType_i32);
1915 Variable *T_2 = makeReg(Dest->getType());
1916 _cvt(T_1, Src0RM);
1917 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1918 _mov(Dest, T_2);
1919 T_2->setPreferredRegister(T_1, true);
1920 }
1921 break;
1922 case InstCast::Fptoui:
Matt Wala83b80362014-07-16 10:21:30 -07001923 if (isVectorType(Dest->getType())) {
1924 assert(Dest->getType() == IceType_v4i32 &&
1925 Inst->getSrc(0)->getType() == IceType_v4f32);
1926 const SizeT MaxSrcs = 1;
1927 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
1928 Call->addArg(Inst->getSrc(0));
1929 lowerCall(Call);
1930 } else if (Dest->getType() == IceType_i64 ||
1931 Dest->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001932 // Use a helper for both x86-32 and x86-64.
1933 split64(Dest);
1934 const SizeT MaxSrcs = 1;
1935 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07001936 Type SrcType = Inst->getSrc(0)->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001937 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");
1938 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");
1939 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
1940 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
1941 // TODO: Call the correct compiler-rt helper function.
1942 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1943 Call->addArg(Inst->getSrc(0));
1944 lowerCall(Call);
1945 return;
1946 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07001947 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001948 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1949 Variable *T_1 = makeReg(IceType_i32);
1950 Variable *T_2 = makeReg(Dest->getType());
1951 _cvt(T_1, Src0RM);
1952 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1953 _mov(Dest, T_2);
1954 T_2->setPreferredRegister(T_1, true);
1955 }
1956 break;
1957 case InstCast::Sitofp:
Matt Wala83b80362014-07-16 10:21:30 -07001958 if (isVectorType(Dest->getType())) {
1959 assert(Dest->getType() == IceType_v4f32 &&
1960 Inst->getSrc(0)->getType() == IceType_v4i32);
1961 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1962 Variable *T = makeReg(Dest->getType());
1963 _cvt(T, Src0RM);
1964 _movp(Dest, T);
1965 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001966 // Use a helper for x86-32.
1967 const SizeT MaxSrcs = 1;
1968 Type DestType = Dest->getType();
1969 InstCall *Call = makeHelperCall(
1970 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);
1971 // TODO: Call the correct compiler-rt helper function.
1972 Call->addArg(Inst->getSrc(0));
1973 lowerCall(Call);
1974 return;
1975 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07001976 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001977 // Sign-extend the operand.
1978 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
1979 Variable *T_1 = makeReg(IceType_i32);
1980 Variable *T_2 = makeReg(Dest->getType());
1981 if (Src0RM->getType() == IceType_i32)
1982 _mov(T_1, Src0RM);
1983 else
1984 _movsx(T_1, Src0RM);
1985 _cvt(T_2, T_1);
1986 _mov(Dest, T_2);
1987 }
1988 break;
Jan Voung1ee34162014-06-24 13:43:30 -07001989 case InstCast::Uitofp: {
1990 Operand *Src0 = Inst->getSrc(0);
Matt Wala83b80362014-07-16 10:21:30 -07001991 if (isVectorType(Src0->getType())) {
1992 assert(Dest->getType() == IceType_v4f32 &&
1993 Src0->getType() == IceType_v4i32);
1994 const SizeT MaxSrcs = 1;
1995 InstCall *Call = makeHelperCall("Sz_uitofp_v4i32", Dest, MaxSrcs);
1996 Call->addArg(Src0);
1997 lowerCall(Call);
1998 } else if (Src0->getType() == IceType_i64 ||
1999 Src0->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002000 // Use a helper for x86-32 and x86-64. Also use a helper for
2001 // i32 on x86-32.
2002 const SizeT MaxSrcs = 1;
2003 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07002004 IceString SrcSubstring = (Src0->getType() == IceType_i64 ? "64" : "32");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002005 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");
2006 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
2007 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
2008 // TODO: Call the correct compiler-rt helper function.
2009 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Jan Voung1ee34162014-06-24 13:43:30 -07002010 Call->addArg(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002011 lowerCall(Call);
2012 return;
2013 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002014 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002015 // Zero-extend the operand.
2016 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2017 Variable *T_1 = makeReg(IceType_i32);
2018 Variable *T_2 = makeReg(Dest->getType());
2019 if (Src0RM->getType() == IceType_i32)
2020 _mov(T_1, Src0RM);
2021 else
2022 _movzx(T_1, Src0RM);
2023 _cvt(T_2, T_1);
2024 _mov(Dest, T_2);
2025 }
2026 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002027 }
2028 case InstCast::Bitcast: {
2029 Operand *Src0 = Inst->getSrc(0);
2030 if (Dest->getType() == Src0->getType()) {
2031 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002032 lowerAssign(Assign);
2033 return;
2034 }
2035 switch (Dest->getType()) {
2036 default:
2037 llvm_unreachable("Unexpected Bitcast dest type");
Matt Wala83b80362014-07-16 10:21:30 -07002038 case IceType_i8: {
2039 assert(Src0->getType() == IceType_v8i1);
2040 InstCall *Call = makeHelperCall("Sz_bitcast_v8i1_to_i8", Dest, 1);
2041 Call->addArg(Src0);
2042 lowerCall(Call);
2043 } break;
2044 case IceType_i16: {
2045 assert(Src0->getType() == IceType_v16i1);
2046 InstCall *Call = makeHelperCall("Sz_bitcast_v16i1_to_i16", Dest, 1);
2047 Call->addArg(Src0);
2048 lowerCall(Call);
2049 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002050 case IceType_i32:
2051 case IceType_f32: {
Jan Voung1ee34162014-06-24 13:43:30 -07002052 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002053 Type DestType = Dest->getType();
2054 Type SrcType = Src0RM->getType();
Jim Stichnoth6e992142014-07-30 14:45:20 -07002055 (void)DestType;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002056 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2057 (DestType == IceType_f32 && SrcType == IceType_i32));
2058 // a.i32 = bitcast b.f32 ==>
2059 // t.f32 = b.f32
2060 // s.f32 = spill t.f32
2061 // a.i32 = s.f32
2062 Variable *T = NULL;
2063 // TODO: Should be able to force a spill setup by calling legalize() with
2064 // Legal_Mem and not Legal_Reg or Legal_Imm.
2065 Variable *Spill = Func->makeVariable(SrcType, Context.getNode());
2066 Spill->setWeight(RegWeight::Zero);
2067 Spill->setPreferredRegister(Dest, true);
2068 _mov(T, Src0RM);
2069 _mov(Spill, T);
2070 _mov(Dest, Spill);
2071 } break;
2072 case IceType_i64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002073 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002074 assert(Src0RM->getType() == IceType_f64);
2075 // a.i64 = bitcast b.f64 ==>
2076 // s.f64 = spill b.f64
2077 // t_lo.i32 = lo(s.f64)
2078 // a_lo.i32 = t_lo.i32
2079 // t_hi.i32 = hi(s.f64)
2080 // a_hi.i32 = t_hi.i32
2081 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
2082 Spill->setWeight(RegWeight::Zero);
2083 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
Jan Voung5cd240d2014-06-25 10:36:46 -07002084 _movq(Spill, Src0RM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002085
2086 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2087 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2088 Variable *T_Lo = makeReg(IceType_i32);
2089 Variable *T_Hi = makeReg(IceType_i32);
2090 VariableSplit *SpillLo =
2091 VariableSplit::create(Func, Spill, VariableSplit::Low);
2092 VariableSplit *SpillHi =
2093 VariableSplit::create(Func, Spill, VariableSplit::High);
2094
2095 _mov(T_Lo, SpillLo);
2096 _mov(DestLo, T_Lo);
2097 _mov(T_Hi, SpillHi);
2098 _mov(DestHi, T_Hi);
2099 } break;
2100 case IceType_f64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002101 Src0 = legalize(Src0);
2102 assert(Src0->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002103 // a.f64 = bitcast b.i64 ==>
2104 // t_lo.i32 = b_lo.i32
Jan Voung1ee34162014-06-24 13:43:30 -07002105 // FakeDef(s.f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002106 // lo(s.f64) = t_lo.i32
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002107 // t_hi.i32 = b_hi.i32
2108 // hi(s.f64) = t_hi.i32
2109 // a.f64 = s.f64
2110 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
2111 Spill->setWeight(RegWeight::Zero);
2112 Spill->setPreferredRegister(Dest, true);
2113
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002114 Variable *T_Lo = NULL, *T_Hi = NULL;
2115 VariableSplit *SpillLo =
2116 VariableSplit::create(Func, Spill, VariableSplit::Low);
2117 VariableSplit *SpillHi =
2118 VariableSplit::create(Func, Spill, VariableSplit::High);
Jan Voung1ee34162014-06-24 13:43:30 -07002119 _mov(T_Lo, loOperand(Src0));
2120 // Technically, the Spill is defined after the _store happens, but
2121 // SpillLo is considered a "use" of Spill so define Spill before it
2122 // is used.
2123 Context.insert(InstFakeDef::create(Func, Spill));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002124 _store(T_Lo, SpillLo);
Jan Voung1ee34162014-06-24 13:43:30 -07002125 _mov(T_Hi, hiOperand(Src0));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002126 _store(T_Hi, SpillHi);
Jan Voung5cd240d2014-06-25 10:36:46 -07002127 _movq(Dest, Spill);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002128 } break;
Matt Wala83b80362014-07-16 10:21:30 -07002129 case IceType_v8i1: {
2130 assert(Src0->getType() == IceType_i8);
2131 InstCall *Call = makeHelperCall("Sz_bitcast_i8_to_v8i1", Dest, 1);
Jan Voung957c50d2014-07-21 14:05:29 -07002132 Variable *Src0AsI32 = Func->makeVariable(stackSlotType(),
2133 Context.getNode());
Matt Wala83b80362014-07-16 10:21:30 -07002134 // Arguments to functions are required to be at least 32 bits wide.
2135 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2136 Call->addArg(Src0AsI32);
2137 lowerCall(Call);
2138 } break;
2139 case IceType_v16i1: {
2140 assert(Src0->getType() == IceType_i16);
2141 InstCall *Call = makeHelperCall("Sz_bitcast_i16_to_v16i1", Dest, 1);
Jan Voung957c50d2014-07-21 14:05:29 -07002142 Variable *Src0AsI32 = Func->makeVariable(stackSlotType(),
2143 Context.getNode());
Matt Wala83b80362014-07-16 10:21:30 -07002144 // Arguments to functions are required to be at least 32 bits wide.
2145 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2146 Call->addArg(Src0AsI32);
2147 lowerCall(Call);
2148 } break;
2149 case IceType_v8i16:
2150 case IceType_v16i8:
2151 case IceType_v4i32:
2152 case IceType_v4f32: {
2153 _movp(Dest, legalizeToVar(Src0));
2154 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002155 }
2156 break;
2157 }
Jan Voung1ee34162014-06-24 13:43:30 -07002158 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002159}
2160
Matt Wala49889232014-07-18 12:45:09 -07002161void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002162 Operand *SourceVectNotLegalized = Inst->getSrc(0);
Matt Wala49889232014-07-18 12:45:09 -07002163 ConstantInteger *ElementIndex =
2164 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));
2165 // Only constant indices are allowed in PNaCl IR.
2166 assert(ElementIndex);
2167
2168 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002169 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002170 Type ElementTy = typeElementType(Ty);
2171 Type InVectorElementTy = getInVectorElementType(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002172 Variable *ExtractedElementR = makeReg(InVectorElementTy);
Matt Wala49889232014-07-18 12:45:09 -07002173
2174 // TODO(wala): Determine the best lowering sequences for each type.
Matt Wala0a450512014-07-30 12:44:39 -07002175 bool CanUsePextr =
2176 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2177 if (CanUsePextr && Ty != IceType_v4f32) {
2178 // Use pextrb, pextrw, or pextrd.
2179 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
Matt Walae3777672014-07-31 09:06:17 -07002180 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2181 _pextr(ExtractedElementR, SourceVectR, Mask);
Matt Wala0a450512014-07-30 12:44:39 -07002182 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2183 // Use pshufd and movd/movss.
Matt Wala49889232014-07-18 12:45:09 -07002184 //
2185 // ALIGNHACK: Force vector operands to registers in instructions that
2186 // require aligned memory operands until support for stack alignment
2187 // is implemented.
2188#define ALIGN_HACK(Vect) legalizeToVar((Vect))
Matt Walae3777672014-07-31 09:06:17 -07002189 Operand *SourceVectRM =
2190 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Matt Walacfe51462014-07-25 15:57:56 -07002191 Variable *T = NULL;
Matt Wala49889232014-07-18 12:45:09 -07002192 if (Index) {
2193 // The shuffle only needs to occur if the element to be extracted
2194 // is not at the lowest index.
2195 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2196 T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002197 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask);
Matt Wala49889232014-07-18 12:45:09 -07002198 } else {
Matt Walae3777672014-07-31 09:06:17 -07002199 T = ALIGN_HACK(SourceVectRM);
Matt Wala49889232014-07-18 12:45:09 -07002200 }
2201
2202 if (InVectorElementTy == IceType_i32) {
Matt Walae3777672014-07-31 09:06:17 -07002203 _movd(ExtractedElementR, T);
Matt Walacfe51462014-07-25 15:57:56 -07002204 } else { // Ty == Icetype_f32
2205 // TODO(wala): _movss is only used here because _mov does not
2206 // allow a vector source and a scalar destination. _mov should be
2207 // able to be used here.
2208 // _movss is a binary instruction, so the FakeDef is needed to
2209 // keep the live range analysis consistent.
Matt Walae3777672014-07-31 09:06:17 -07002210 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2211 _movss(ExtractedElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002212 }
2213#undef ALIGN_HACK
Matt Wala49889232014-07-18 12:45:09 -07002214 } else {
2215 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2216 // Spill the value to a stack slot and do the extraction in memory.
Matt Wala49889232014-07-18 12:45:09 -07002217 //
Matt Walae3777672014-07-31 09:06:17 -07002218 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002219 // support for legalizing to mem is implemented.
2220 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2221 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002222 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002223
2224 // Compute the location of the element in memory.
2225 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2226 OperandX8632Mem *Loc =
2227 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002228 _mov(ExtractedElementR, Loc);
Matt Wala49889232014-07-18 12:45:09 -07002229 }
2230
2231 if (ElementTy == IceType_i1) {
2232 // Truncate extracted integers to i1s if necessary.
2233 Variable *T = makeReg(IceType_i1);
2234 InstCast *Cast =
Matt Walae3777672014-07-31 09:06:17 -07002235 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002236 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002237 ExtractedElementR = T;
Matt Wala49889232014-07-18 12:45:09 -07002238 }
2239
2240 // Copy the element to the destination.
2241 Variable *Dest = Inst->getDest();
Matt Walae3777672014-07-31 09:06:17 -07002242 _mov(Dest, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002243}
2244
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002245void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
2246 Operand *Src0 = Inst->getSrc(0);
2247 Operand *Src1 = Inst->getSrc(1);
2248 Variable *Dest = Inst->getDest();
Matt Walace0ca8f2014-07-24 12:34:20 -07002249
2250 if (isVectorType(Dest->getType())) {
2251 InstFcmp::FCond Condition = Inst->getCondition();
2252 size_t Index = static_cast<size_t>(Condition);
2253 assert(Index < TableFcmpSize);
2254
2255 if (TableFcmp[Index].SwapVectorOperands) {
2256 Operand *T = Src0;
2257 Src0 = Src1;
2258 Src1 = T;
2259 }
2260
2261 Variable *T = NULL;
2262
Matt Walae3777672014-07-31 09:06:17 -07002263 if (Condition == InstFcmp::True) {
2264 // makeVectorOfOnes() requires an integer vector type.
Matt Walace0ca8f2014-07-24 12:34:20 -07002265 T = makeVectorOfMinusOnes(IceType_v4i32);
Matt Walae3777672014-07-31 09:06:17 -07002266 } else if (Condition == InstFcmp::False) {
2267 T = makeVectorOfZeros(Dest->getType());
2268 } else {
2269 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2270 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2271
2272 // ALIGNHACK: Without support for stack alignment, both operands to
2273 // cmpps need to be forced into registers. Once support for stack
2274 // alignment is implemented, remove LEGAL_HACK.
2275#define LEGAL_HACK(Vect) legalizeToVar((Vect))
2276 switch (Condition) {
2277 default: {
2278 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
2279 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
2280 T = makeReg(Src0RM->getType());
2281 _movp(T, Src0RM);
2282 _cmpps(T, LEGAL_HACK(Src1RM), Predicate);
2283 } break;
2284 case InstFcmp::One: {
2285 // Check both unequal and ordered.
2286 T = makeReg(Src0RM->getType());
2287 Variable *T2 = makeReg(Src0RM->getType());
2288 Src1RM = LEGAL_HACK(Src1RM);
2289 _movp(T, Src0RM);
2290 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);
2291 _movp(T2, Src0RM);
2292 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);
2293 _pand(T, T2);
2294 } break;
2295 case InstFcmp::Ueq: {
2296 // Check both equal or unordered.
2297 T = makeReg(Src0RM->getType());
2298 Variable *T2 = makeReg(Src0RM->getType());
2299 Src1RM = LEGAL_HACK(Src1RM);
2300 _movp(T, Src0RM);
2301 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);
2302 _movp(T2, Src0RM);
2303 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);
2304 _por(T, T2);
2305 } break;
2306 }
Matt Walace0ca8f2014-07-24 12:34:20 -07002307#undef LEGAL_HACK
Matt Walae3777672014-07-31 09:06:17 -07002308 }
Matt Walace0ca8f2014-07-24 12:34:20 -07002309
2310 _movp(Dest, T);
2311 eliminateNextVectorSextInstruction(Dest);
2312 return;
2313 }
2314
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002315 // Lowering a = fcmp cond, b, c
2316 // ucomiss b, c /* only if C1 != Br_None */
2317 // /* but swap b,c order if SwapOperands==true */
2318 // mov a, <default>
2319 // j<C1> label /* only if C1 != Br_None */
2320 // j<C2> label /* only if C2 != Br_None */
2321 // FakeUse(a) /* only if C1 != Br_None */
2322 // mov a, !<default> /* only if C1 != Br_None */
2323 // label: /* only if C1 != Br_None */
2324 InstFcmp::FCond Condition = Inst->getCondition();
2325 size_t Index = static_cast<size_t>(Condition);
2326 assert(Index < TableFcmpSize);
Matt Walace0ca8f2014-07-24 12:34:20 -07002327 if (TableFcmp[Index].SwapScalarOperands) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002328 Operand *Tmp = Src0;
2329 Src0 = Src1;
2330 Src1 = Tmp;
2331 }
2332 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
2333 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
2334 if (HasC1) {
2335 Src0 = legalize(Src0);
2336 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2337 Variable *T = NULL;
2338 _mov(T, Src0);
2339 _ucomiss(T, Src1RM);
2340 }
2341 Constant *Default =
2342 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
2343 _mov(Dest, Default);
2344 if (HasC1) {
2345 InstX8632Label *Label = InstX8632Label::create(Func, this);
2346 _br(TableFcmp[Index].C1, Label);
2347 if (HasC2) {
2348 _br(TableFcmp[Index].C2, Label);
2349 }
2350 Context.insert(InstFakeUse::create(Func, Dest));
2351 Constant *NonDefault =
2352 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
2353 _mov(Dest, NonDefault);
2354 Context.insert(Label);
2355 }
2356}
2357
2358void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2359 Operand *Src0 = legalize(Inst->getSrc(0));
2360 Operand *Src1 = legalize(Inst->getSrc(1));
2361 Variable *Dest = Inst->getDest();
2362
Matt Wala9a0168a2014-07-23 14:56:10 -07002363 if (isVectorType(Dest->getType())) {
2364 Type Ty = Src0->getType();
2365 // Promote i1 vectors to 128 bit integer vector types.
2366 if (typeElementType(Ty) == IceType_i1) {
2367 Type NewTy = IceType_NUM;
2368 switch (Ty) {
2369 default:
2370 llvm_unreachable("unexpected type");
2371 break;
2372 case IceType_v4i1:
2373 NewTy = IceType_v4i32;
2374 break;
2375 case IceType_v8i1:
2376 NewTy = IceType_v8i16;
2377 break;
2378 case IceType_v16i1:
2379 NewTy = IceType_v16i8;
2380 break;
2381 }
2382 Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());
2383 Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());
2384 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2385 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2386 Src0 = NewSrc0;
2387 Src1 = NewSrc1;
2388 Ty = NewTy;
2389 }
2390
2391 InstIcmp::ICond Condition = Inst->getCondition();
2392
Matt Walae3777672014-07-31 09:06:17 -07002393 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2394 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2395
Matt Wala9a0168a2014-07-23 14:56:10 -07002396 // SSE2 only has signed comparison operations. Transform unsigned
2397 // inputs in a manner that allows for the use of signed comparison
2398 // operations by flipping the high order bits.
2399 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2400 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2401 Variable *T0 = makeReg(Ty);
2402 Variable *T1 = makeReg(Ty);
2403 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002404 _movp(T0, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002405 _pxor(T0, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002406 _movp(T1, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002407 _pxor(T1, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002408 Src0RM = T0;
2409 Src1RM = T1;
Matt Wala9a0168a2014-07-23 14:56:10 -07002410 }
2411
2412 // TODO: ALIGNHACK: Both operands to compare instructions need to be
2413 // in registers until stack alignment support is implemented. Once
2414 // there is support for stack alignment, LEGAL_HACK can be removed.
2415#define LEGAL_HACK(Vect) legalizeToVar((Vect))
2416 Variable *T = makeReg(Ty);
2417 switch (Condition) {
2418 default:
2419 llvm_unreachable("unexpected condition");
2420 break;
2421 case InstIcmp::Eq: {
Matt Walae3777672014-07-31 09:06:17 -07002422 _movp(T, Src0RM);
2423 _pcmpeq(T, LEGAL_HACK(Src1RM));
Matt Wala9a0168a2014-07-23 14:56:10 -07002424 } break;
2425 case InstIcmp::Ne: {
Matt Walae3777672014-07-31 09:06:17 -07002426 _movp(T, Src0RM);
2427 _pcmpeq(T, LEGAL_HACK(Src1RM));
Matt Wala9a0168a2014-07-23 14:56:10 -07002428 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2429 _pxor(T, MinusOne);
2430 } break;
2431 case InstIcmp::Ugt:
2432 case InstIcmp::Sgt: {
Matt Walae3777672014-07-31 09:06:17 -07002433 _movp(T, Src0RM);
2434 _pcmpgt(T, LEGAL_HACK(Src1RM));
Matt Wala9a0168a2014-07-23 14:56:10 -07002435 } break;
2436 case InstIcmp::Uge:
2437 case InstIcmp::Sge: {
Matt Walae3777672014-07-31 09:06:17 -07002438 // !(Src1RM > Src0RM)
2439 _movp(T, Src1RM);
2440 _pcmpgt(T, LEGAL_HACK(Src0RM));
Matt Wala9a0168a2014-07-23 14:56:10 -07002441 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2442 _pxor(T, MinusOne);
2443 } break;
2444 case InstIcmp::Ult:
2445 case InstIcmp::Slt: {
Matt Walae3777672014-07-31 09:06:17 -07002446 _movp(T, Src1RM);
2447 _pcmpgt(T, LEGAL_HACK(Src0RM));
Matt Wala9a0168a2014-07-23 14:56:10 -07002448 } break;
2449 case InstIcmp::Ule:
2450 case InstIcmp::Sle: {
Matt Walae3777672014-07-31 09:06:17 -07002451 // !(Src0RM > Src1RM)
2452 _movp(T, Src0RM);
2453 _pcmpgt(T, LEGAL_HACK(Src1RM));
Matt Wala9a0168a2014-07-23 14:56:10 -07002454 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2455 _pxor(T, MinusOne);
2456 } break;
2457 }
2458#undef LEGAL_HACK
2459
2460 _movp(Dest, T);
Matt Walace0ca8f2014-07-24 12:34:20 -07002461 eliminateNextVectorSextInstruction(Dest);
Matt Wala9a0168a2014-07-23 14:56:10 -07002462 return;
2463 }
2464
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002465 // If Src1 is an immediate, or known to be a physical register, we can
2466 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2467 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2468 // the physical register, but unfortunately we have to commit to one or
2469 // the other before register allocation.)
2470 bool IsSrc1ImmOrReg = false;
2471 if (llvm::isa<Constant>(Src1)) {
2472 IsSrc1ImmOrReg = true;
2473 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
2474 if (Var->hasReg())
2475 IsSrc1ImmOrReg = true;
2476 }
2477
2478 // Try to fuse a compare immediately followed by a conditional branch. This
2479 // is possible when the compare dest and the branch source operands are the
2480 // same, and are their only uses. TODO: implement this optimization for i64.
2481 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
2482 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
2483 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
Jim Stichnoth206833c2014-08-07 10:58:05 -07002484 Operand *Src0New = legalize(
2485 Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg, true);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002486 _cmp(Src0New, Src1);
2487 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2488 NextBr->getTargetFalse());
2489 // Skip over the following branch instruction.
2490 NextBr->setDeleted();
2491 Context.advanceNext();
2492 return;
2493 }
2494 }
2495
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002496 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
Matt Wala43ff7eb2014-06-18 10:30:07 -07002497 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002498 Constant *One = Ctx->getConstantInt(IceType_i32, 1);
2499 if (Src0->getType() == IceType_i64) {
2500 InstIcmp::ICond Condition = Inst->getCondition();
2501 size_t Index = static_cast<size_t>(Condition);
2502 assert(Index < TableIcmp64Size);
2503 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2504 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2505 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2506 InstX8632Label *Label = InstX8632Label::create(Func, this);
2507 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
2508 _cmp(loOperand(Src0), Src1LoRI);
2509 _br(InstX8632Br::Br_ne, Label);
2510 _cmp(hiOperand(Src0), Src1HiRI);
2511 _br(InstX8632Br::Br_ne, Label);
2512 Context.insert(InstFakeUse::create(Func, Dest));
2513 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
2514 Context.insert(Label);
2515 } else {
2516 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2517 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2518 _mov(Dest, One);
2519 _cmp(hiOperand(Src0), Src1HiRI);
2520 _br(TableIcmp64[Index].C1, LabelTrue);
2521 _br(TableIcmp64[Index].C2, LabelFalse);
2522 _cmp(loOperand(Src0), Src1LoRI);
2523 _br(TableIcmp64[Index].C3, LabelTrue);
2524 Context.insert(LabelFalse);
2525 Context.insert(InstFakeUse::create(Func, Dest));
2526 _mov(Dest, Zero);
2527 Context.insert(LabelTrue);
2528 }
2529 return;
2530 }
2531
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002532 // cmp b, c
2533 Operand *Src0New =
2534 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
2535 InstX8632Label *Label = InstX8632Label::create(Func, this);
2536 _cmp(Src0New, Src1);
2537 _mov(Dest, One);
2538 _br(getIcmp32Mapping(Inst->getCondition()), Label);
2539 Context.insert(InstFakeUse::create(Func, Dest));
2540 _mov(Dest, Zero);
2541 Context.insert(Label);
2542}
2543
Matt Wala49889232014-07-18 12:45:09 -07002544void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002545 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2546 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
Matt Wala49889232014-07-18 12:45:09 -07002547 ConstantInteger *ElementIndex =
2548 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
2549 // Only constant indices are allowed in PNaCl IR.
2550 assert(ElementIndex);
2551 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002552 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
Matt Wala49889232014-07-18 12:45:09 -07002553
Matt Walae3777672014-07-31 09:06:17 -07002554 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002555 Type ElementTy = typeElementType(Ty);
2556 Type InVectorElementTy = getInVectorElementType(Ty);
2557
2558 if (ElementTy == IceType_i1) {
2559 // Expand the element to the appropriate size for it to be inserted
2560 // in the vector.
2561 Variable *Expanded =
2562 Func->makeVariable(InVectorElementTy, Context.getNode());
Matt Walae3777672014-07-31 09:06:17 -07002563 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2564 ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002565 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002566 ElementToInsertNotLegalized = Expanded;
Matt Wala49889232014-07-18 12:45:09 -07002567 }
2568
Matt Wala0a450512014-07-30 12:44:39 -07002569 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2570 // Use insertps, pinsrb, pinsrw, or pinsrd.
Matt Walae3777672014-07-31 09:06:17 -07002571 Operand *ElementRM =
2572 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2573 Operand *SourceVectRM =
2574 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07002575 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002576 _movp(T, SourceVectRM);
Matt Wala0a450512014-07-30 12:44:39 -07002577 if (Ty == IceType_v4f32)
Matt Walae3777672014-07-31 09:06:17 -07002578 _insertps(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index << 4));
Matt Wala0a450512014-07-30 12:44:39 -07002579 else
Matt Walae3777672014-07-31 09:06:17 -07002580 _pinsr(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index));
Matt Wala0a450512014-07-30 12:44:39 -07002581 _movp(Inst->getDest(), T);
2582 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2583 // Use shufps or movss.
Matt Walae3777672014-07-31 09:06:17 -07002584 Variable *ElementR = NULL;
2585 Operand *SourceVectRM =
2586 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2587
Matt Wala49889232014-07-18 12:45:09 -07002588 if (InVectorElementTy == IceType_f32) {
Matt Walae3777672014-07-31 09:06:17 -07002589 // ElementR will be in an XMM register since it is floating point.
2590 ElementR = legalizeToVar(ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002591 } else {
2592 // Copy an integer to an XMM register.
Matt Walae3777672014-07-31 09:06:17 -07002593 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2594 ElementR = makeReg(Ty);
2595 _movd(ElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002596 }
2597
Matt Walacfe51462014-07-25 15:57:56 -07002598 if (Index == 0) {
2599 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002600 _movp(T, SourceVectRM);
2601 _movss(T, ElementR);
Matt Walacfe51462014-07-25 15:57:56 -07002602 _movp(Inst->getDest(), T);
2603 return;
2604 }
2605
Matt Wala49889232014-07-18 12:45:09 -07002606 // shufps treats the source and desination operands as vectors of
2607 // four doublewords. The destination's two high doublewords are
2608 // selected from the source operand and the two low doublewords are
2609 // selected from the (original value of) the destination operand.
2610 // An insertelement operation can be effected with a sequence of two
2611 // shufps operations with appropriate masks. In all cases below,
2612 // Element[0] is being inserted into SourceVectOperand. Indices are
2613 // ordered from left to right.
2614 //
Matt Walae3777672014-07-31 09:06:17 -07002615 // insertelement into index 1 (result is stored in ElementR):
2616 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
2617 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
Matt Wala49889232014-07-18 12:45:09 -07002618 //
2619 // insertelement into index 2 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002620 // T := SourceVectRM
2621 // ElementR := ElementR[0, 0] T[0, 3]
2622 // T := T[0, 1] ElementR[0, 3]
Matt Wala49889232014-07-18 12:45:09 -07002623 //
2624 // insertelement into index 3 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002625 // T := SourceVectRM
2626 // ElementR := ElementR[0, 0] T[0, 2]
2627 // T := T[0, 1] ElementR[3, 0]
Matt Walacfe51462014-07-25 15:57:56 -07002628 const unsigned char Mask1[3] = {0, 192, 128};
2629 const unsigned char Mask2[3] = {227, 196, 52};
Matt Wala49889232014-07-18 12:45:09 -07002630
Matt Walacfe51462014-07-25 15:57:56 -07002631 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
2632 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
Matt Wala49889232014-07-18 12:45:09 -07002633
2634 // ALIGNHACK: Force vector operands to registers in instructions that
2635 // require aligned memory operands until support for stack alignment
2636 // is implemented.
2637#define ALIGN_HACK(Vect) legalizeToVar((Vect))
Matt Walacfe51462014-07-25 15:57:56 -07002638 if (Index == 1) {
Matt Walae3777672014-07-31 09:06:17 -07002639 SourceVectRM = ALIGN_HACK(SourceVectRM);
2640 _shufps(ElementR, SourceVectRM, Mask1Constant);
2641 _shufps(ElementR, SourceVectRM, Mask2Constant);
2642 _movp(Inst->getDest(), ElementR);
Matt Wala49889232014-07-18 12:45:09 -07002643 } else {
2644 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002645 _movp(T, SourceVectRM);
2646 _shufps(ElementR, T, Mask1Constant);
2647 _shufps(T, ElementR, Mask2Constant);
Matt Wala49889232014-07-18 12:45:09 -07002648 _movp(Inst->getDest(), T);
2649 }
2650#undef ALIGN_HACK
Matt Wala49889232014-07-18 12:45:09 -07002651 } else {
2652 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2653 // Spill the value to a stack slot and perform the insertion in
2654 // memory.
Matt Wala49889232014-07-18 12:45:09 -07002655 //
Matt Walae3777672014-07-31 09:06:17 -07002656 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002657 // support for legalizing to mem is implemented.
2658 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2659 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002660 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002661
2662 // Compute the location of the position to insert in memory.
2663 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2664 OperandX8632Mem *Loc =
2665 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002666 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
Matt Wala49889232014-07-18 12:45:09 -07002667
2668 Variable *T = makeReg(Ty);
2669 _movp(T, Slot);
2670 _movp(Inst->getDest(), T);
2671 }
2672}
2673
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002674void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2675 switch (Instr->getIntrinsicInfo().ID) {
Jan Vounga3a01a22014-07-14 10:32:41 -07002676 case Intrinsics::AtomicCmpxchg: {
Jan Voung5cd240d2014-06-25 10:36:46 -07002677 if (!Intrinsics::VerifyMemoryOrder(
2678 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
2679 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2680 return;
2681 }
2682 if (!Intrinsics::VerifyMemoryOrder(
2683 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
2684 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2685 return;
2686 }
Jan Vounga3a01a22014-07-14 10:32:41 -07002687 Variable *DestPrev = Instr->getDest();
2688 Operand *PtrToMem = Instr->getArg(0);
2689 Operand *Expected = Instr->getArg(1);
2690 Operand *Desired = Instr->getArg(2);
Jan Voungc820ddf2014-07-29 14:38:51 -07002691 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2692 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002693 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
Jan Voung5cd240d2014-06-25 10:36:46 -07002694 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002695 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002696 case Intrinsics::AtomicFence:
Jan Voung5cd240d2014-06-25 10:36:46 -07002697 if (!Intrinsics::VerifyMemoryOrder(
2698 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
2699 Func->setError("Unexpected memory ordering for AtomicFence");
2700 return;
2701 }
2702 _mfence();
2703 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002704 case Intrinsics::AtomicFenceAll:
Jan Voung5cd240d2014-06-25 10:36:46 -07002705 // NOTE: FenceAll should prevent and load/store from being moved
2706 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2707 // instruction is currently marked coarsely as "HasSideEffects".
2708 _mfence();
2709 return;
2710 case Intrinsics::AtomicIsLockFree: {
2711 // X86 is always lock free for 8/16/32/64 bit accesses.
2712 // TODO(jvoung): Since the result is constant when given a constant
2713 // byte size, this opens up DCE opportunities.
2714 Operand *ByteSize = Instr->getArg(0);
2715 Variable *Dest = Instr->getDest();
2716 if (ConstantInteger *CI = llvm::dyn_cast<ConstantInteger>(ByteSize)) {
2717 Constant *Result;
2718 switch (CI->getValue()) {
2719 default:
2720 // Some x86-64 processors support the cmpxchg16b intruction, which
2721 // can make 16-byte operations lock free (when used with the LOCK
2722 // prefix). However, that's not supported in 32-bit mode, so just
2723 // return 0 even for large sizes.
2724 Result = Ctx->getConstantZero(IceType_i32);
2725 break;
2726 case 1:
2727 case 2:
2728 case 4:
2729 case 8:
2730 Result = Ctx->getConstantInt(IceType_i32, 1);
2731 break;
2732 }
2733 _mov(Dest, Result);
2734 return;
2735 }
2736 // The PNaCl ABI requires the byte size to be a compile-time constant.
2737 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2738 return;
2739 }
2740 case Intrinsics::AtomicLoad: {
2741 // We require the memory address to be naturally aligned.
2742 // Given that is the case, then normal loads are atomic.
2743 if (!Intrinsics::VerifyMemoryOrder(
2744 llvm::cast<ConstantInteger>(Instr->getArg(1))->getValue())) {
2745 Func->setError("Unexpected memory ordering for AtomicLoad");
2746 return;
2747 }
2748 Variable *Dest = Instr->getDest();
2749 if (Dest->getType() == IceType_i64) {
2750 // Follow what GCC does and use a movq instead of what lowerLoad()
2751 // normally does (split the load into two).
2752 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2753 // can't happen anyway, since this is x86-32 and integer arithmetic only
2754 // happens on 32-bit quantities.
2755 Variable *T = makeReg(IceType_f64);
2756 OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64);
2757 _movq(T, Addr);
2758 // Then cast the bits back out of the XMM register to the i64 Dest.
2759 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2760 lowerCast(Cast);
Jan Vounge6e497d2014-07-30 10:06:03 -07002761 // Make sure that the atomic load isn't elided when unused.
Jan Voung5cd240d2014-06-25 10:36:46 -07002762 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2763 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2764 return;
2765 }
2766 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2767 lowerLoad(Load);
Jan Vounge6e497d2014-07-30 10:06:03 -07002768 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2769 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2770 // insert the FakeUse on the last-inserted instruction's dest.
Jim Stichnoth6e992142014-07-30 14:45:20 -07002771 Context.insert(
2772 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
Jan Voung5cd240d2014-06-25 10:36:46 -07002773 return;
2774 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002775 case Intrinsics::AtomicRMW:
Jan Voung5cd240d2014-06-25 10:36:46 -07002776 if (!Intrinsics::VerifyMemoryOrder(
2777 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
2778 Func->setError("Unexpected memory ordering for AtomicRMW");
2779 return;
2780 }
2781 lowerAtomicRMW(Instr->getDest(),
2782 static_cast<uint32_t>(llvm::cast<ConstantInteger>(
2783 Instr->getArg(0))->getValue()),
2784 Instr->getArg(1), Instr->getArg(2));
2785 return;
2786 case Intrinsics::AtomicStore: {
2787 if (!Intrinsics::VerifyMemoryOrder(
2788 llvm::cast<ConstantInteger>(Instr->getArg(2))->getValue())) {
2789 Func->setError("Unexpected memory ordering for AtomicStore");
2790 return;
2791 }
2792 // We require the memory address to be naturally aligned.
2793 // Given that is the case, then normal stores are atomic.
2794 // Add a fence after the store to make it visible.
2795 Operand *Value = Instr->getArg(0);
2796 Operand *Ptr = Instr->getArg(1);
2797 if (Value->getType() == IceType_i64) {
2798 // Use a movq instead of what lowerStore() normally does
2799 // (split the store into two), following what GCC does.
2800 // Cast the bits from int -> to an xmm register first.
2801 Variable *T = makeReg(IceType_f64);
2802 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2803 lowerCast(Cast);
2804 // Then store XMM w/ a movq.
2805 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64);
2806 _storeq(T, Addr);
2807 _mfence();
2808 return;
2809 }
2810 InstStore *Store = InstStore::create(Func, Value, Ptr);
2811 lowerStore(Store);
2812 _mfence();
2813 return;
2814 }
Jan Voung7fa813b2014-07-18 13:01:08 -07002815 case Intrinsics::Bswap: {
2816 Variable *Dest = Instr->getDest();
2817 Operand *Val = Instr->getArg(0);
2818 // In 32-bit mode, bswap only works on 32-bit arguments, and the
2819 // argument must be a register. Use rotate left for 16-bit bswap.
2820 if (Val->getType() == IceType_i64) {
2821 Variable *T_Lo = legalizeToVar(loOperand(Val));
2822 Variable *T_Hi = legalizeToVar(hiOperand(Val));
2823 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2824 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2825 _bswap(T_Lo);
2826 _bswap(T_Hi);
2827 _mov(DestLo, T_Hi);
2828 _mov(DestHi, T_Lo);
2829 } else if (Val->getType() == IceType_i32) {
2830 Variable *T = legalizeToVar(Val);
2831 _bswap(T);
2832 _mov(Dest, T);
2833 } else {
2834 assert(Val->getType() == IceType_i16);
2835 Val = legalize(Val);
2836 Constant *Eight = Ctx->getConstantInt(IceType_i16, 8);
2837 Variable *T = NULL;
2838 _mov(T, Val);
2839 _rol(T, Eight);
2840 _mov(Dest, T);
2841 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002842 return;
Jan Voung7fa813b2014-07-18 13:01:08 -07002843 }
Jan Vounge4da26f2014-07-15 17:52:39 -07002844 case Intrinsics::Ctpop: {
2845 Variable *Dest = Instr->getDest();
2846 Operand *Val = Instr->getArg(0);
2847 InstCall *Call = makeHelperCall(Val->getType() == IceType_i64 ?
2848 "__popcountdi2" : "__popcountsi2", Dest, 1);
2849 Call->addArg(Val);
2850 lowerCall(Call);
2851 // The popcount helpers always return 32-bit values, while the intrinsic's
2852 // signature matches the native POPCNT instruction and fills a 64-bit reg
2853 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
2854 // the user doesn't do that in the IR. If the user does that in the IR,
2855 // then this zero'ing instruction is dead and gets optimized out.
2856 if (Val->getType() == IceType_i64) {
2857 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2858 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2859 _mov(DestHi, Zero);
2860 }
2861 return;
2862 }
2863 case Intrinsics::Ctlz: {
2864 // The "is zero undef" parameter is ignored and we always return
2865 // a well-defined value.
2866 Operand *Val = legalize(Instr->getArg(0));
2867 Operand *FirstVal;
2868 Operand *SecondVal = NULL;
2869 if (Val->getType() == IceType_i64) {
2870 FirstVal = loOperand(Val);
2871 SecondVal = hiOperand(Val);
2872 } else {
2873 FirstVal = Val;
2874 }
2875 const bool IsCttz = false;
2876 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
2877 SecondVal);
2878 return;
2879 }
2880 case Intrinsics::Cttz: {
2881 // The "is zero undef" parameter is ignored and we always return
2882 // a well-defined value.
2883 Operand *Val = legalize(Instr->getArg(0));
2884 Operand *FirstVal;
2885 Operand *SecondVal = NULL;
2886 if (Val->getType() == IceType_i64) {
2887 FirstVal = hiOperand(Val);
2888 SecondVal = loOperand(Val);
2889 } else {
2890 FirstVal = Val;
2891 }
2892 const bool IsCttz = true;
2893 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
2894 SecondVal);
2895 return;
2896 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002897 case Intrinsics::Longjmp: {
2898 InstCall *Call = makeHelperCall("longjmp", NULL, 2);
2899 Call->addArg(Instr->getArg(0));
2900 Call->addArg(Instr->getArg(1));
2901 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002902 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002903 }
2904 case Intrinsics::Memcpy: {
2905 // In the future, we could potentially emit an inline memcpy/memset, etc.
2906 // for intrinsic calls w/ a known length.
2907 InstCall *Call = makeHelperCall("memcpy", NULL, 3);
2908 Call->addArg(Instr->getArg(0));
2909 Call->addArg(Instr->getArg(1));
2910 Call->addArg(Instr->getArg(2));
2911 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002912 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002913 }
2914 case Intrinsics::Memmove: {
2915 InstCall *Call = makeHelperCall("memmove", NULL, 3);
2916 Call->addArg(Instr->getArg(0));
2917 Call->addArg(Instr->getArg(1));
2918 Call->addArg(Instr->getArg(2));
2919 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002920 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002921 }
2922 case Intrinsics::Memset: {
2923 // The value operand needs to be extended to a stack slot size
Jan Voung7b34b592014-07-18 13:12:58 -07002924 // because "push" only works for a specific operand size.
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002925 Operand *ValOp = Instr->getArg(1);
2926 assert(ValOp->getType() == IceType_i8);
Jan Voung957c50d2014-07-21 14:05:29 -07002927 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode());
2928 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002929 InstCall *Call = makeHelperCall("memset", NULL, 3);
2930 Call->addArg(Instr->getArg(0));
2931 Call->addArg(ValExt);
2932 Call->addArg(Instr->getArg(2));
2933 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002934 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002935 }
2936 case Intrinsics::NaClReadTP: {
Jan Voung5cd240d2014-06-25 10:36:46 -07002937 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002938 Operand *Src = OperandX8632Mem::create(Func, IceType_i32, NULL, Zero, NULL,
2939 0, OperandX8632Mem::SegReg_GS);
2940 Variable *Dest = Instr->getDest();
2941 Variable *T = NULL;
2942 _mov(T, Src);
2943 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07002944 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002945 }
2946 case Intrinsics::Setjmp: {
2947 InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1);
2948 Call->addArg(Instr->getArg(0));
2949 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07002950 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002951 }
Jan Voungf37fbbe2014-07-09 16:13:13 -07002952 case Intrinsics::Sqrt: {
2953 Operand *Src = legalize(Instr->getArg(0));
2954 Variable *Dest = Instr->getDest();
2955 Variable *T = makeReg(Dest->getType());
2956 _sqrtss(T, Src);
2957 _mov(Dest, T);
2958 return;
2959 }
Jan Voung7b34b592014-07-18 13:12:58 -07002960 case Intrinsics::Stacksave: {
2961 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
2962 Variable *Dest = Instr->getDest();
2963 _mov(Dest, esp);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002964 return;
Jan Voung7b34b592014-07-18 13:12:58 -07002965 }
2966 case Intrinsics::Stackrestore: {
2967 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
2968 _mov(esp, Instr->getArg(0));
2969 return;
2970 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002971 case Intrinsics::Trap:
2972 _ud2();
Jan Voung5cd240d2014-06-25 10:36:46 -07002973 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002974 case Intrinsics::UnknownIntrinsic:
2975 Func->setError("Should not be lowering UnknownIntrinsic");
2976 return;
2977 }
2978 return;
2979}
2980
Jan Vounga3a01a22014-07-14 10:32:41 -07002981void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
2982 Operand *Expected, Operand *Desired) {
2983 if (Expected->getType() == IceType_i64) {
2984 // Reserve the pre-colored registers first, before adding any more
2985 // infinite-weight variables from FormMemoryOperand's legalization.
2986 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
2987 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
2988 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
2989 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
2990 _mov(T_eax, loOperand(Expected));
2991 _mov(T_edx, hiOperand(Expected));
2992 _mov(T_ebx, loOperand(Desired));
2993 _mov(T_ecx, hiOperand(Desired));
2994 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2995 const bool Locked = true;
2996 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
2997 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
2998 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
2999 _mov(DestLo, T_eax);
3000 _mov(DestHi, T_edx);
3001 return;
3002 }
3003 Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
3004 _mov(T_eax, Expected);
3005 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3006 Variable *DesiredReg = legalizeToVar(Desired);
3007 const bool Locked = true;
3008 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3009 _mov(DestPrev, T_eax);
3010}
3011
Jan Voungc820ddf2014-07-29 14:38:51 -07003012bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
3013 Operand *Expected,
3014 Operand *Desired) {
3015 if (Ctx->getOptLevel() == Opt_m1)
3016 return false;
3017 // Peek ahead a few instructions and see how Dest is used.
3018 // It's very common to have:
3019 //
3020 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3021 // [%y_phi = ...] // list of phi stores
3022 // %p = icmp eq i32 %x, %expected
3023 // br i1 %p, label %l1, label %l2
3024 //
3025 // which we can optimize into:
3026 //
3027 // %x = <cmpxchg code>
3028 // [%y_phi = ...] // list of phi stores
3029 // br eq, %l1, %l2
3030 InstList::iterator I = Context.getCur();
3031 // I is currently the InstIntrinsicCall. Peek past that.
3032 // This assumes that the atomic cmpxchg has not been lowered yet,
3033 // so that the instructions seen in the scan from "Cur" is simple.
3034 assert(llvm::isa<InstIntrinsicCall>(*I));
3035 Inst *NextInst = Context.getNextInst(I);
3036 if (!NextInst)
3037 return false;
3038 // There might be phi assignments right before the compare+branch, since this
3039 // could be a backward branch for a loop. This placement of assignments is
3040 // determined by placePhiStores().
3041 std::vector<InstAssign *> PhiAssigns;
3042 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3043 if (PhiAssign->getDest() == Dest)
3044 return false;
3045 PhiAssigns.push_back(PhiAssign);
3046 NextInst = Context.getNextInst(I);
3047 if (!NextInst)
3048 return false;
3049 }
3050 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3051 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3052 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3053 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3054 return false;
3055 }
3056 NextInst = Context.getNextInst(I);
3057 if (!NextInst)
3058 return false;
3059 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3060 if (!NextBr->isUnconditional() &&
3061 NextCmp->getDest() == NextBr->getCondition() &&
3062 NextBr->isLastUse(NextCmp->getDest())) {
3063 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3064 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3065 // Lower the phi assignments now, before the branch (same placement
3066 // as before).
3067 InstAssign *PhiAssign = PhiAssigns[i];
3068 lowerAssign(PhiAssign);
3069 PhiAssign->setDeleted();
3070 Context.advanceNext();
3071 }
3072 _br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
3073 // Skip over the old compare and branch, by deleting them.
3074 NextCmp->setDeleted();
3075 NextBr->setDeleted();
3076 Context.advanceNext();
3077 Context.advanceNext();
3078 return true;
3079 }
3080 }
3081 }
3082 return false;
3083}
3084
Jan Voung5cd240d2014-06-25 10:36:46 -07003085void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3086 Operand *Ptr, Operand *Val) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003087 bool NeedsCmpxchg = false;
3088 LowerBinOp Op_Lo = NULL;
3089 LowerBinOp Op_Hi = NULL;
Jan Voung5cd240d2014-06-25 10:36:46 -07003090 switch (Operation) {
3091 default:
3092 Func->setError("Unknown AtomicRMW operation");
3093 return;
3094 case Intrinsics::AtomicAdd: {
3095 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003096 // All the fall-through paths must set this to true, but use this
3097 // for asserting.
3098 NeedsCmpxchg = true;
3099 Op_Lo = &TargetX8632::_add;
3100 Op_Hi = &TargetX8632::_adc;
3101 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003102 }
3103 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3104 const bool Locked = true;
3105 Variable *T = NULL;
3106 _mov(T, Val);
3107 _xadd(Addr, T, Locked);
3108 _mov(Dest, T);
3109 return;
3110 }
3111 case Intrinsics::AtomicSub: {
3112 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003113 NeedsCmpxchg = true;
3114 Op_Lo = &TargetX8632::_sub;
3115 Op_Hi = &TargetX8632::_sbb;
3116 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003117 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003118 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3119 const bool Locked = true;
3120 Variable *T = NULL;
3121 _mov(T, Val);
3122 _neg(T);
3123 _xadd(Addr, T, Locked);
3124 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003125 return;
3126 }
3127 case Intrinsics::AtomicOr:
Jan Vounga3a01a22014-07-14 10:32:41 -07003128 // TODO(jvoung): If Dest is null or dead, then some of these
3129 // operations do not need an "exchange", but just a locked op.
3130 // That appears to be "worth" it for sub, or, and, and xor.
3131 // xadd is probably fine vs lock add for add, and xchg is fine
3132 // vs an atomic store.
3133 NeedsCmpxchg = true;
3134 Op_Lo = &TargetX8632::_or;
3135 Op_Hi = &TargetX8632::_or;
3136 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003137 case Intrinsics::AtomicAnd:
Jan Vounga3a01a22014-07-14 10:32:41 -07003138 NeedsCmpxchg = true;
3139 Op_Lo = &TargetX8632::_and;
3140 Op_Hi = &TargetX8632::_and;
3141 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003142 case Intrinsics::AtomicXor:
Jan Vounga3a01a22014-07-14 10:32:41 -07003143 NeedsCmpxchg = true;
3144 Op_Lo = &TargetX8632::_xor;
3145 Op_Hi = &TargetX8632::_xor;
3146 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003147 case Intrinsics::AtomicExchange:
Jan Vounga3a01a22014-07-14 10:32:41 -07003148 if (Dest->getType() == IceType_i64) {
3149 NeedsCmpxchg = true;
3150 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3151 // just need to be moved to the ecx and ebx registers.
3152 Op_Lo = NULL;
3153 Op_Hi = NULL;
3154 break;
3155 }
3156 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3157 Variable *T = NULL;
3158 _mov(T, Val);
3159 _xchg(Addr, T);
3160 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003161 return;
3162 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003163 // Otherwise, we need a cmpxchg loop.
Jim Stichnoth6e992142014-07-30 14:45:20 -07003164 (void)NeedsCmpxchg;
Jan Vounga3a01a22014-07-14 10:32:41 -07003165 assert(NeedsCmpxchg);
3166 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3167}
3168
3169void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
3170 Variable *Dest, Operand *Ptr,
3171 Operand *Val) {
3172 // Expand a more complex RMW operation as a cmpxchg loop:
3173 // For 64-bit:
3174 // mov eax, [ptr]
3175 // mov edx, [ptr + 4]
3176 // .LABEL:
3177 // mov ebx, eax
3178 // <Op_Lo> ebx, <desired_adj_lo>
3179 // mov ecx, edx
3180 // <Op_Hi> ecx, <desired_adj_hi>
3181 // lock cmpxchg8b [ptr]
3182 // jne .LABEL
3183 // mov <dest_lo>, eax
3184 // mov <dest_lo>, edx
3185 //
3186 // For 32-bit:
3187 // mov eax, [ptr]
3188 // .LABEL:
3189 // mov <reg>, eax
3190 // op <reg>, [desired_adj]
3191 // lock cmpxchg [ptr], <reg>
3192 // jne .LABEL
3193 // mov <dest>, eax
3194 //
3195 // If Op_{Lo,Hi} are NULL, then just copy the value.
3196 Val = legalize(Val);
3197 Type Ty = Val->getType();
3198 if (Ty == IceType_i64) {
3199 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
3200 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
3201 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3202 _mov(T_eax, loOperand(Addr));
3203 _mov(T_edx, hiOperand(Addr));
3204 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
3205 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
3206 InstX8632Label *Label = InstX8632Label::create(Func, this);
3207 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
3208 if (!IsXchg8b) {
3209 Context.insert(Label);
3210 _mov(T_ebx, T_eax);
3211 (this->*Op_Lo)(T_ebx, loOperand(Val));
3212 _mov(T_ecx, T_edx);
3213 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3214 } else {
3215 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3216 // It just needs the Val loaded into ebx and ecx.
3217 // That can also be done before the loop.
3218 _mov(T_ebx, loOperand(Val));
3219 _mov(T_ecx, hiOperand(Val));
3220 Context.insert(Label);
3221 }
3222 const bool Locked = true;
3223 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3224 _br(InstX8632Br::Br_ne, Label);
3225 if (!IsXchg8b) {
3226 // If Val is a variable, model the extended live range of Val through
3227 // the end of the loop, since it will be re-used by the loop.
3228 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3229 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3230 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3231 Context.insert(InstFakeUse::create(Func, ValLo));
3232 Context.insert(InstFakeUse::create(Func, ValHi));
3233 }
3234 } else {
3235 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3236 Context.insert(InstFakeUse::create(Func, T_ebx));
3237 Context.insert(InstFakeUse::create(Func, T_ecx));
3238 }
3239 // The address base is also reused in the loop.
3240 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3241 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3242 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3243 _mov(DestLo, T_eax);
3244 _mov(DestHi, T_edx);
3245 return;
3246 }
3247 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3248 Variable *T_eax = makeReg(Ty, Reg_eax);
3249 _mov(T_eax, Addr);
3250 InstX8632Label *Label = InstX8632Label::create(Func, this);
3251 Context.insert(Label);
3252 // We want to pick a different register for T than Eax, so don't use
3253 // _mov(T == NULL, T_eax).
3254 Variable *T = makeReg(Ty);
3255 _mov(T, T_eax);
3256 (this->*Op_Lo)(T, Val);
3257 const bool Locked = true;
3258 _cmpxchg(Addr, T_eax, T, Locked);
3259 _br(InstX8632Br::Br_ne, Label);
3260 // If Val is a variable, model the extended live range of Val through
3261 // the end of the loop, since it will be re-used by the loop.
3262 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3263 Context.insert(InstFakeUse::create(Func, ValVar));
3264 }
3265 // The address base is also reused in the loop.
3266 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3267 _mov(Dest, T_eax);
Jan Voung5cd240d2014-06-25 10:36:46 -07003268}
3269
Jan Vounge4da26f2014-07-15 17:52:39 -07003270// Lowers count {trailing, leading} zeros intrinsic.
3271//
3272// We could do constant folding here, but that should have
3273// been done by the front-end/middle-end optimizations.
3274void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3275 Operand *FirstVal, Operand *SecondVal) {
3276 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3277 // Then the instructions will handle the Val == 0 case much more simply
3278 // and won't require conversion from bit position to number of zeros.
3279 //
3280 // Otherwise:
3281 // bsr IF_NOT_ZERO, Val
3282 // mov T_DEST, 63
3283 // cmovne T_DEST, IF_NOT_ZERO
3284 // xor T_DEST, 31
3285 // mov DEST, T_DEST
3286 //
3287 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3288 // register. Also, bsf and bsr require their dest to be a register.
3289 //
3290 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3291 // E.g., for 000... 00001100, bsr will say that the most significant bit
3292 // set is at position 3, while the number of leading zeros is 28. Xor is
3293 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3294 //
3295 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3296 // are all zero, and compute the result for that case (checking the lower
3297 // 32 bits). Then actually compute the result for the upper bits and
3298 // cmov in the result from the lower computation if the earlier speculation
3299 // was correct.
3300 //
3301 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3302 // bit position conversion, and the speculation is reversed.
3303 assert(Ty == IceType_i32 || Ty == IceType_i64);
3304 Variable *T = makeReg(IceType_i32);
Matt Wala53c5e602014-07-22 16:39:38 -07003305 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
Jan Vounge4da26f2014-07-15 17:52:39 -07003306 if (Cttz) {
Matt Wala53c5e602014-07-22 16:39:38 -07003307 _bsf(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003308 } else {
Matt Wala53c5e602014-07-22 16:39:38 -07003309 _bsr(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003310 }
3311 Variable *T_Dest = makeReg(IceType_i32);
3312 Constant *ThirtyTwo = Ctx->getConstantInt(IceType_i32, 32);
3313 Constant *ThirtyOne = Ctx->getConstantInt(IceType_i32, 31);
3314 if (Cttz) {
3315 _mov(T_Dest, ThirtyTwo);
3316 } else {
3317 Constant *SixtyThree = Ctx->getConstantInt(IceType_i32, 63);
3318 _mov(T_Dest, SixtyThree);
3319 }
3320 _cmov(T_Dest, T, InstX8632::Br_ne);
3321 if (!Cttz) {
3322 _xor(T_Dest, ThirtyOne);
3323 }
3324 if (Ty == IceType_i32) {
3325 _mov(Dest, T_Dest);
3326 return;
3327 }
3328 _add(T_Dest, ThirtyTwo);
3329 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3330 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3331 // Will be using "test" on this, so we need a registerized variable.
3332 Variable *SecondVar = legalizeToVar(SecondVal);
3333 Variable *T_Dest2 = makeReg(IceType_i32);
3334 if (Cttz) {
3335 _bsf(T_Dest2, SecondVar);
3336 } else {
3337 _bsr(T_Dest2, SecondVar);
3338 _xor(T_Dest2, ThirtyOne);
3339 }
3340 _test(SecondVar, SecondVar);
3341 _cmov(T_Dest2, T_Dest, InstX8632::Br_e);
3342 _mov(DestLo, T_Dest2);
3343 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3344}
3345
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003346namespace {
3347
3348bool isAdd(const Inst *Inst) {
3349 if (const InstArithmetic *Arith =
3350 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3351 return (Arith->getOp() == InstArithmetic::Add);
3352 }
3353 return false;
3354}
3355
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003356void computeAddressOpt(Variable *&Base, Variable *&Index, uint16_t &Shift,
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003357 int32_t &Offset) {
3358 (void)Offset; // TODO: pattern-match for non-zero offsets.
3359 if (Base == NULL)
3360 return;
3361 // If the Base has more than one use or is live across multiple
3362 // blocks, then don't go further. Alternatively (?), never consider
3363 // a transformation that would change a variable that is currently
3364 // *not* live across basic block boundaries into one that *is*.
3365 if (Base->isMultiblockLife() /* || Base->getUseCount() > 1*/)
3366 return;
3367
3368 while (true) {
3369 // Base is Base=Var ==>
3370 // set Base=Var
3371 const Inst *BaseInst = Base->getDefinition();
3372 Operand *BaseOperand0 = BaseInst ? BaseInst->getSrc(0) : NULL;
3373 Variable *BaseVariable0 = llvm::dyn_cast_or_null<Variable>(BaseOperand0);
3374 // TODO: Helper function for all instances of assignment
3375 // transitivity.
3376 if (BaseInst && llvm::isa<InstAssign>(BaseInst) && BaseVariable0 &&
3377 // TODO: ensure BaseVariable0 stays single-BB
3378 true) {
3379 Base = BaseVariable0;
3380 continue;
3381 }
3382
3383 // Index is Index=Var ==>
3384 // set Index=Var
3385
3386 // Index==NULL && Base is Base=Var1+Var2 ==>
3387 // set Base=Var1, Index=Var2, Shift=0
3388 Operand *BaseOperand1 =
3389 BaseInst && BaseInst->getSrcSize() >= 2 ? BaseInst->getSrc(1) : NULL;
3390 Variable *BaseVariable1 = llvm::dyn_cast_or_null<Variable>(BaseOperand1);
3391 if (Index == NULL && isAdd(BaseInst) && BaseVariable0 && BaseVariable1 &&
3392 // TODO: ensure BaseVariable0 and BaseVariable1 stay single-BB
3393 true) {
3394 Base = BaseVariable0;
3395 Index = BaseVariable1;
3396 Shift = 0; // should already have been 0
3397 continue;
3398 }
3399
3400 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
3401 // Index=Var, Shift+=log2(Const)
3402 const Inst *IndexInst = Index ? Index->getDefinition() : NULL;
3403 if (const InstArithmetic *ArithInst =
3404 llvm::dyn_cast_or_null<InstArithmetic>(IndexInst)) {
3405 Operand *IndexOperand0 = ArithInst->getSrc(0);
3406 Variable *IndexVariable0 = llvm::dyn_cast<Variable>(IndexOperand0);
3407 Operand *IndexOperand1 = ArithInst->getSrc(1);
3408 ConstantInteger *IndexConstant1 =
3409 llvm::dyn_cast<ConstantInteger>(IndexOperand1);
3410 if (ArithInst->getOp() == InstArithmetic::Mul && IndexVariable0 &&
3411 IndexOperand1->getType() == IceType_i32 && IndexConstant1) {
3412 uint64_t Mult = IndexConstant1->getValue();
3413 uint32_t LogMult;
3414 switch (Mult) {
3415 case 1:
3416 LogMult = 0;
3417 break;
3418 case 2:
3419 LogMult = 1;
3420 break;
3421 case 4:
3422 LogMult = 2;
3423 break;
3424 case 8:
3425 LogMult = 3;
3426 break;
3427 default:
3428 LogMult = 4;
3429 break;
3430 }
3431 if (Shift + LogMult <= 3) {
3432 Index = IndexVariable0;
3433 Shift += LogMult;
3434 continue;
3435 }
3436 }
3437 }
3438
3439 // Index is Index=Var<<Const && Const+Shift<=3 ==>
3440 // Index=Var, Shift+=Const
3441
3442 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
3443 // Index=Var, Shift+=log2(Const)
3444
3445 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
3446 // swap(Index,Base)
3447 // Similar for Base=Const*Var and Base=Var<<Const
3448
3449 // Base is Base=Var+Const ==>
3450 // set Base=Var, Offset+=Const
3451
3452 // Base is Base=Const+Var ==>
3453 // set Base=Var, Offset+=Const
3454
3455 // Base is Base=Var-Const ==>
3456 // set Base=Var, Offset-=Const
3457
3458 // Index is Index=Var+Const ==>
3459 // set Index=Var, Offset+=(Const<<Shift)
3460
3461 // Index is Index=Const+Var ==>
3462 // set Index=Var, Offset+=(Const<<Shift)
3463
3464 // Index is Index=Var-Const ==>
3465 // set Index=Var, Offset-=(Const<<Shift)
3466
3467 // TODO: consider overflow issues with respect to Offset.
3468 // TODO: handle symbolic constants.
3469 break;
3470 }
3471}
3472
3473} // anonymous namespace
3474
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003475void TargetX8632::lowerLoad(const InstLoad *Inst) {
3476 // A Load instruction can be treated the same as an Assign
3477 // instruction, after the source operand is transformed into an
3478 // OperandX8632Mem operand. Note that the address mode
3479 // optimization already creates an OperandX8632Mem operand, so it
3480 // doesn't need another level of transformation.
3481 Type Ty = Inst->getDest()->getType();
Jan Voung5cd240d2014-06-25 10:36:46 -07003482 Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003483
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003484 // Fuse this load with a subsequent Arithmetic instruction in the
3485 // following situations:
3486 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
3487 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
3488 //
3489 // TODO: Clean up and test thoroughly.
Jan Voung5cd240d2014-06-25 10:36:46 -07003490 // (E.g., if there is an mfence-all make sure the load ends up on the
3491 // same side of the fence).
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003492 //
3493 // TODO: Why limit to Arithmetic instructions? This could probably be
3494 // applied to most any instruction type. Look at all source operands
3495 // in the following instruction, and if there is one instance of the
3496 // load instruction's dest variable, and that instruction ends that
3497 // variable's live range, then make the substitution. Deal with
3498 // commutativity optimization in the arithmetic instruction lowering.
3499 InstArithmetic *NewArith = NULL;
3500 if (InstArithmetic *Arith =
3501 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
3502 Variable *DestLoad = Inst->getDest();
3503 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
3504 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
3505 if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
3506 DestLoad != Src0Arith) {
3507 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3508 Arith->getSrc(0), Src0);
3509 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
3510 Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
3511 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3512 Arith->getSrc(1), Src0);
3513 }
3514 if (NewArith) {
3515 Arith->setDeleted();
3516 Context.advanceNext();
3517 lowerArithmetic(NewArith);
3518 return;
3519 }
3520 }
3521
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003522 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
3523 lowerAssign(Assign);
3524}
3525
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003526void TargetX8632::doAddressOptLoad() {
3527 Inst *Inst = *Context.getCur();
3528 Variable *Dest = Inst->getDest();
3529 Operand *Addr = Inst->getSrc(0);
3530 Variable *Index = NULL;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003531 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003532 int32_t Offset = 0; // TODO: make Constant
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003533 // Vanilla ICE load instructions should not use the segment registers,
3534 // and computeAddressOpt only works at the level of Variables and Constants,
3535 // not other OperandX8632Mem, so there should be no mention of segment
3536 // registers there either.
3537 const OperandX8632Mem::SegmentRegisters SegmentReg =
3538 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003539 Variable *Base = llvm::dyn_cast<Variable>(Addr);
3540 computeAddressOpt(Base, Index, Shift, Offset);
3541 if (Base && Addr != Base) {
3542 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
3543 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003544 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003545 Inst->setDeleted();
3546 Context.insert(InstLoad::create(Func, Dest, Addr));
3547 }
3548}
3549
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003550void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
3551 Func->setError("Phi found in regular instruction list");
3552}
3553
3554void TargetX8632::lowerRet(const InstRet *Inst) {
3555 Variable *Reg = NULL;
3556 if (Inst->hasRetValue()) {
3557 Operand *Src0 = legalize(Inst->getRetValue());
3558 if (Src0->getType() == IceType_i64) {
3559 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);
3560 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);
3561 Reg = eax;
3562 Context.insert(InstFakeUse::create(Func, edx));
3563 } else if (Src0->getType() == IceType_f32 ||
3564 Src0->getType() == IceType_f64) {
3565 _fld(Src0);
Matt Wala928f1292014-07-07 16:50:46 -07003566 } else if (isVectorType(Src0->getType())) {
3567 Reg = legalizeToVar(Src0, false, Reg_xmm0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003568 } else {
3569 _mov(Reg, Src0, Reg_eax);
3570 }
3571 }
3572 _ret(Reg);
3573 // Add a fake use of esp to make sure esp stays alive for the entire
3574 // function. Otherwise post-call esp adjustments get dead-code
3575 // eliminated. TODO: Are there more places where the fake use
3576 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
3577 // have a ret instruction.
3578 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
3579 Context.insert(InstFakeUse::create(Func, esp));
3580}
3581
3582void TargetX8632::lowerSelect(const InstSelect *Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003583 Variable *Dest = Inst->getDest();
3584 Operand *SrcT = Inst->getTrueOperand();
3585 Operand *SrcF = Inst->getFalseOperand();
Matt Wala9cb61e22014-07-24 09:44:42 -07003586 Operand *Condition = Inst->getCondition();
3587
3588 if (isVectorType(Dest->getType())) {
Matt Wala9cb61e22014-07-24 09:44:42 -07003589 Type SrcTy = SrcT->getType();
3590 Variable *T = makeReg(SrcTy);
Matt Walae3777672014-07-31 09:06:17 -07003591 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3592 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07003593 // ALIGNHACK: Until stack alignment support is implemented, vector
3594 // instructions need to have vector operands in registers. Once
3595 // there is support for stack alignment, LEGAL_HACK can be removed.
3596#define LEGAL_HACK(Vect) legalizeToVar((Vect))
3597 if (InstructionSet >= SSE4_1) {
3598 // TODO(wala): If the condition operand is a constant, use blendps
3599 // or pblendw.
3600 //
3601 // Use blendvps or pblendvb to implement select.
3602 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3603 SrcTy == IceType_v4f32) {
Matt Walae3777672014-07-31 09:06:17 -07003604 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07003605 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
Matt Walae3777672014-07-31 09:06:17 -07003606 _movp(xmm0, ConditionRM);
Matt Wala0a450512014-07-30 12:44:39 -07003607 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));
Matt Walae3777672014-07-31 09:06:17 -07003608 _movp(T, SrcFRM);
3609 _blendvps(T, LEGAL_HACK(SrcTRM), xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003610 _movp(Dest, T);
3611 } else {
3612 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3613 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3614 : IceType_v16i8;
3615 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
3616 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
Matt Walae3777672014-07-31 09:06:17 -07003617 _movp(T, SrcFRM);
3618 _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003619 _movp(Dest, T);
3620 }
3621 return;
3622 }
3623 // Lower select without SSE4.1:
3624 // a=d?b:c ==>
3625 // if elementtype(d) != i1:
3626 // d=sext(d);
3627 // a=(b&d)|(c&~d);
Matt Wala9cb61e22014-07-24 09:44:42 -07003628 Variable *T2 = makeReg(SrcTy);
3629 // Sign extend the condition operand if applicable.
3630 if (SrcTy == IceType_v4f32) {
3631 // The sext operation takes only integer arguments.
3632 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());
3633 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3634 _movp(T, T3);
3635 } else if (typeElementType(SrcTy) != IceType_i1) {
3636 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3637 } else {
Matt Walae3777672014-07-31 09:06:17 -07003638 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3639 _movp(T, ConditionRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07003640 }
Matt Wala9cb61e22014-07-24 09:44:42 -07003641 _movp(T2, T);
Matt Walae3777672014-07-31 09:06:17 -07003642 _pand(T, LEGAL_HACK(SrcTRM));
3643 _pandn(T2, LEGAL_HACK(SrcFRM));
Matt Wala9cb61e22014-07-24 09:44:42 -07003644 _por(T, T2);
3645 _movp(Dest, T);
3646#undef LEGAL_HACK
3647
3648 return;
3649 }
3650
3651 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
3652 Operand *ConditionRMI = legalize(Condition);
Matt Wala43ff7eb2014-06-18 10:30:07 -07003653 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003654 InstX8632Label *Label = InstX8632Label::create(Func, this);
3655
3656 if (Dest->getType() == IceType_i64) {
3657 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3658 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3659 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
3660 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
Matt Wala9cb61e22014-07-24 09:44:42 -07003661 _cmp(ConditionRMI, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003662 _mov(DestLo, SrcLoRI);
3663 _mov(DestHi, SrcHiRI);
3664 _br(InstX8632Br::Br_ne, Label);
3665 Context.insert(InstFakeUse::create(Func, DestLo));
3666 Context.insert(InstFakeUse::create(Func, DestHi));
3667 Operand *SrcFLo = loOperand(SrcF);
3668 Operand *SrcFHi = hiOperand(SrcF);
3669 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);
3670 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);
3671 _mov(DestLo, SrcLoRI);
3672 _mov(DestHi, SrcHiRI);
3673 } else {
Matt Wala9cb61e22014-07-24 09:44:42 -07003674 _cmp(ConditionRMI, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003675 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
3676 _mov(Dest, SrcT);
3677 _br(InstX8632Br::Br_ne, Label);
3678 Context.insert(InstFakeUse::create(Func, Dest));
3679 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);
3680 _mov(Dest, SrcF);
3681 }
3682
3683 Context.insert(Label);
3684}
3685
3686void TargetX8632::lowerStore(const InstStore *Inst) {
3687 Operand *Value = Inst->getData();
3688 Operand *Addr = Inst->getAddr();
Jan Voung5cd240d2014-06-25 10:36:46 -07003689 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003690
3691 if (NewAddr->getType() == IceType_i64) {
3692 Value = legalize(Value);
3693 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
3694 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
3695 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
3696 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
3697 } else {
3698 Value = legalize(Value, Legal_Reg | Legal_Imm, true);
3699 _store(Value, NewAddr);
3700 }
3701}
3702
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003703void TargetX8632::doAddressOptStore() {
3704 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur());
3705 Operand *Data = Inst->getData();
3706 Operand *Addr = Inst->getAddr();
3707 Variable *Index = NULL;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003708 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003709 int32_t Offset = 0; // TODO: make Constant
3710 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003711 // Vanilla ICE store instructions should not use the segment registers,
3712 // and computeAddressOpt only works at the level of Variables and Constants,
3713 // not other OperandX8632Mem, so there should be no mention of segment
3714 // registers there either.
3715 const OperandX8632Mem::SegmentRegisters SegmentReg =
3716 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003717 computeAddressOpt(Base, Index, Shift, Offset);
3718 if (Base && Addr != Base) {
3719 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
3720 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003721 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003722 Inst->setDeleted();
3723 Context.insert(InstStore::create(Func, Data, Addr));
3724 }
3725}
3726
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003727void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
3728 // This implements the most naive possible lowering.
3729 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
3730 Operand *Src0 = Inst->getComparison();
3731 SizeT NumCases = Inst->getNumCases();
3732 // OK, we'll be slightly less naive by forcing Src into a physical
3733 // register if there are 2 or more uses.
3734 if (NumCases >= 2)
3735 Src0 = legalizeToVar(Src0, true);
3736 else
3737 Src0 = legalize(Src0, Legal_All, true);
3738 for (SizeT I = 0; I < NumCases; ++I) {
3739 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
3740 _cmp(Src0, Value);
3741 _br(InstX8632Br::Br_e, Inst->getLabel(I));
3742 }
3743
3744 _br(Inst->getLabelDefault());
3745}
3746
Matt Walace0ca8f2014-07-24 12:34:20 -07003747// The following pattern occurs often in lowered C and C++ code:
3748//
3749// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
3750// %cmp.ext = sext <n x i1> %cmp to <n x ty>
3751//
3752// We can eliminate the sext operation by copying the result of pcmpeqd,
3753// pcmpgtd, or cmpps (which produce sign extended results) to the result
3754// of the sext operation.
3755void
3756TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
3757 if (InstCast *NextCast =
3758 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
3759 if (NextCast->getCastKind() == InstCast::Sext &&
3760 NextCast->getSrc(0) == SignExtendedResult) {
3761 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
3762 // Skip over the instruction.
3763 NextCast->setDeleted();
3764 Context.advanceNext();
3765 }
3766 }
3767}
3768
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003769void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
3770 const SizeT MaxSrcs = 0;
3771 Variable *Dest = NULL;
3772 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
3773 lowerCall(Call);
3774}
3775
Matt Wala9a0168a2014-07-23 14:56:10 -07003776// There is no support for loading or emitting vector constants, so the
3777// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
3778// etc. are initialized with register operations.
3779//
3780// TODO(wala): Add limited support for vector constants so that
3781// complex initialization in registers is unnecessary.
3782
Matt Wala83b80362014-07-16 10:21:30 -07003783Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07003784 Variable *Reg = makeReg(Ty, RegNum);
3785 // Insert a FakeDef, since otherwise the live range of Reg might
3786 // be overestimated.
3787 Context.insert(InstFakeDef::create(Func, Reg));
3788 _pxor(Reg, Reg);
3789 return Reg;
3790}
3791
Matt Wala9a0168a2014-07-23 14:56:10 -07003792Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
3793 Variable *MinusOnes = makeReg(Ty, RegNum);
3794 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
3795 Context.insert(InstFakeDef::create(Func, MinusOnes));
3796 _pcmpeq(MinusOnes, MinusOnes);
3797 return MinusOnes;
3798}
3799
Matt Wala83b80362014-07-16 10:21:30 -07003800Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07003801 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
Matt Wala9a0168a2014-07-23 14:56:10 -07003802 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
Matt Wala83b80362014-07-16 10:21:30 -07003803 _psub(Dest, MinusOne);
3804 return Dest;
3805}
3806
Matt Wala9a0168a2014-07-23 14:56:10 -07003807Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
3808 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
3809 Ty == IceType_v16i8);
3810 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
3811 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
3812 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
3813 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));
3814 return Reg;
3815 } else {
3816 // SSE has no left shift operation for vectors of 8 bit integers.
3817 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
3818 Constant *ConstantMask =
3819 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);
3820 Variable *Reg = makeReg(Ty, RegNum);
3821 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
3822 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
3823 return Reg;
3824 }
3825}
3826
Matt Wala49889232014-07-18 12:45:09 -07003827OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
3828 Variable *Slot,
3829 uint32_t Offset) {
3830 // Ensure that Loc is a stack slot.
3831 assert(Slot->getWeight() == RegWeight::Zero);
3832 assert(Slot->getRegNum() == Variable::NoRegister);
3833 // Compute the location of Loc in memory.
3834 // TODO(wala,stichnot): lea should not be required. The address of
3835 // the stack slot is known at compile time (although not until after
3836 // addProlog()).
3837 const Type PointerType = IceType_i32;
3838 Variable *Loc = makeReg(PointerType);
3839 _lea(Loc, Slot);
3840 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset);
3841 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
3842}
3843
Matt Wala928f1292014-07-07 16:50:46 -07003844// Helper for legalize() to emit the right code to lower an operand to a
3845// register of the appropriate type.
3846Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
3847 Type Ty = Src->getType();
3848 Variable *Reg = makeReg(Ty, RegNum);
Matt Walaad8f7262014-07-14 17:37:37 -07003849 if (isVectorType(Ty)) {
Matt Wala928f1292014-07-07 16:50:46 -07003850 _movp(Reg, Src);
3851 } else {
3852 _mov(Reg, Src);
3853 }
3854 return Reg;
3855}
3856
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003857Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
3858 bool AllowOverlap, int32_t RegNum) {
3859 // Assert that a physical register is allowed. To date, all calls
3860 // to legalize() allow a physical register. If a physical register
3861 // needs to be explicitly disallowed, then new code will need to be
3862 // written to force a spill.
3863 assert(Allowed & Legal_Reg);
3864 // If we're asking for a specific physical register, make sure we're
3865 // not allowing any other operand kinds. (This could be future
3866 // work, e.g. allow the shl shift amount to be either an immediate
3867 // or in ecx.)
3868 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
3869 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
3870 // Before doing anything with a Mem operand, we need to ensure
3871 // that the Base and Index components are in physical registers.
3872 Variable *Base = Mem->getBase();
3873 Variable *Index = Mem->getIndex();
3874 Variable *RegBase = NULL;
3875 Variable *RegIndex = NULL;
3876 if (Base) {
3877 RegBase = legalizeToVar(Base, true);
3878 }
3879 if (Index) {
3880 RegIndex = legalizeToVar(Index, true);
3881 }
3882 if (Base != RegBase || Index != RegIndex) {
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003883 From = OperandX8632Mem::create(
3884 Func, Mem->getType(), RegBase, Mem->getOffset(), RegIndex,
3885 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003886 }
3887
3888 if (!(Allowed & Legal_Mem)) {
Matt Wala928f1292014-07-07 16:50:46 -07003889 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003890 }
3891 return From;
3892 }
3893 if (llvm::isa<Constant>(From)) {
Matt Walad8f4a7d2014-06-18 09:55:03 -07003894 if (llvm::isa<ConstantUndef>(From)) {
3895 // Lower undefs to zero. Another option is to lower undefs to an
3896 // uninitialized register; however, using an uninitialized register
3897 // results in less predictable code.
3898 //
3899 // If in the future the implementation is changed to lower undef
3900 // values to uninitialized registers, a FakeDef will be needed:
3901 // Context.insert(InstFakeDef::create(Func, Reg));
3902 // This is in order to ensure that the live range of Reg is not
3903 // overestimated. If the constant being lowered is a 64 bit value,
3904 // then the result should be split and the lo and hi components will
3905 // need to go in uninitialized registers.
Matt Wala83b80362014-07-16 10:21:30 -07003906 if (isVectorType(From->getType()))
3907 return makeVectorOfZeros(From->getType());
3908 From = Ctx->getConstantZero(From->getType());
Matt Walad8f4a7d2014-06-18 09:55:03 -07003909 }
Matt Walaad8f7262014-07-14 17:37:37 -07003910 // There should be no constants of vector type (other than undef).
3911 assert(!isVectorType(From->getType()));
Jim Stichnothde4ca712014-06-29 08:13:48 -07003912 bool NeedsReg = false;
3913 if (!(Allowed & Legal_Imm))
3914 // Immediate specifically not allowed
3915 NeedsReg = true;
3916 // TODO(stichnot): LEAHACK: remove Legal_Reloc once a proper
3917 // emitter is used.
3918 if (!(Allowed & Legal_Reloc) && llvm::isa<ConstantRelocatable>(From))
3919 // Relocatable specifically not allowed
3920 NeedsReg = true;
3921 if (!(Allowed & Legal_Mem) &&
3922 (From->getType() == IceType_f32 || From->getType() == IceType_f64))
3923 // On x86, FP constants are lowered to mem operands.
3924 NeedsReg = true;
Jan Voung5a13f452014-06-18 10:42:02 -07003925 if (NeedsReg) {
Matt Wala928f1292014-07-07 16:50:46 -07003926 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003927 }
3928 return From;
3929 }
3930 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
Matt Walaad8f7262014-07-14 17:37:37 -07003931 // Check if the variable is guaranteed a physical register. This
3932 // can happen either when the variable is pre-colored or when it is
3933 // assigned infinite weight.
3934 bool MustHaveRegister =
3935 (Var->hasReg() || Var->getWeight() == RegWeight::Inf);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003936 // We need a new physical register for the operand if:
Jim Stichnothca662e92014-07-10 15:32:36 -07003937 // Mem is not allowed and Var isn't guaranteed a physical
3938 // register, or
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003939 // RegNum is required and Var->getRegNum() doesn't match.
Matt Walaad8f7262014-07-14 17:37:37 -07003940 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003941 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
Matt Wala928f1292014-07-07 16:50:46 -07003942 Variable *Reg = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003943 if (RegNum == Variable::NoRegister) {
3944 Reg->setPreferredRegister(Var, AllowOverlap);
3945 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003946 From = Reg;
3947 }
3948 return From;
3949 }
3950 llvm_unreachable("Unhandled operand kind in legalize()");
3951 return From;
3952}
3953
3954// Provide a trivial wrapper to legalize() for this common usage.
3955Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,
3956 int32_t RegNum) {
3957 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
3958}
3959
Jan Voung5cd240d2014-06-25 10:36:46 -07003960OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
3961 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
3962 // It may be the case that address mode optimization already creates
3963 // an OperandX8632Mem, so in that case it wouldn't need another level
3964 // of transformation.
3965 if (!Mem) {
3966 Variable *Base = llvm::dyn_cast<Variable>(Operand);
3967 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
3968 assert(Base || Offset);
Matt Walae3777672014-07-31 09:06:17 -07003969 if (Offset) {
3970 assert(llvm::isa<ConstantInteger>(Offset) ||
3971 llvm::isa<ConstantRelocatable>(Offset));
3972 }
Jan Voung5cd240d2014-06-25 10:36:46 -07003973 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
3974 }
3975 return llvm::cast<OperandX8632Mem>(legalize(Mem));
3976}
3977
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003978Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
Jan Voung1ee34162014-06-24 13:43:30 -07003979 // There aren't any 64-bit integer registers for x86-32.
3980 assert(Type != IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003981 Variable *Reg = Func->makeVariable(Type, Context.getNode());
3982 if (RegNum == Variable::NoRegister)
3983 Reg->setWeightInfinite();
3984 else
3985 Reg->setRegNum(RegNum);
3986 return Reg;
3987}
3988
3989void TargetX8632::postLower() {
3990 if (Ctx->getOptLevel() != Opt_m1)
3991 return;
3992 // TODO: Avoid recomputing WhiteList every instruction.
Jan Voung95598992014-07-09 09:54:25 -07003993 RegSetMask RegInclude = RegSet_All;
3994 RegSetMask RegExclude = RegSet_None;
3995 if (hasFramePointer())
3996 RegExclude |= RegSet_FramePointer;
3997 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003998 // Make one pass to black-list pre-colored registers. TODO: If
3999 // there was some prior register allocation pass that made register
4000 // assignments, those registers need to be black-listed here as
4001 // well.
4002 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
4003 ++I) {
4004 const Inst *Inst = *I;
4005 if (Inst->isDeleted())
4006 continue;
4007 if (llvm::isa<InstFakeKill>(Inst))
4008 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004009 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
4010 Operand *Src = Inst->getSrc(SrcNum);
4011 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004012 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004013 const Variable *Var = Src->getVar(J);
4014 if (!Var->hasReg())
4015 continue;
4016 WhiteList[Var->getRegNum()] = false;
4017 }
4018 }
4019 }
4020 // The second pass colors infinite-weight variables.
4021 llvm::SmallBitVector AvailableRegisters = WhiteList;
4022 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
4023 ++I) {
4024 const Inst *Inst = *I;
4025 if (Inst->isDeleted())
4026 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004027 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
4028 Operand *Src = Inst->getSrc(SrcNum);
4029 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004030 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004031 Variable *Var = Src->getVar(J);
4032 if (Var->hasReg())
4033 continue;
4034 if (!Var->getWeight().isInf())
4035 continue;
4036 llvm::SmallBitVector AvailableTypedRegisters =
4037 AvailableRegisters & getRegisterSetForType(Var->getType());
4038 if (!AvailableTypedRegisters.any()) {
4039 // This is a hack in case we run out of physical registers
4040 // due to an excessive number of "push" instructions from
4041 // lowering a call.
4042 AvailableRegisters = WhiteList;
4043 AvailableTypedRegisters =
4044 AvailableRegisters & getRegisterSetForType(Var->getType());
4045 }
4046 assert(AvailableTypedRegisters.any());
4047 int32_t RegNum = AvailableTypedRegisters.find_first();
4048 Var->setRegNum(RegNum);
4049 AvailableRegisters[RegNum] = false;
4050 }
4051 }
4052 }
4053}
4054
Matt Wala928f1292014-07-07 16:50:46 -07004055template <> void ConstantInteger::emit(GlobalContext *Ctx) const {
4056 Ostream &Str = Ctx->getStrEmit();
4057 Str << getValue();
4058}
4059
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004060template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
4061 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004062 // It would be better to prefix with ".L$" instead of "L$", but
4063 // llvm-mc doesn't parse "dword ptr [.L$foo]".
4064 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
4065}
4066
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004067template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
4068 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004069 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
4070}
4071
Matt Walae3777672014-07-31 09:06:17 -07004072void ConstantUndef::emit(GlobalContext *) const {
4073 llvm_unreachable("undef value encountered by emitter.");
4074}
4075
Jim Stichnothde4ca712014-06-29 08:13:48 -07004076TargetGlobalInitX8632::TargetGlobalInitX8632(GlobalContext *Ctx)
4077 : TargetGlobalInitLowering(Ctx) {}
4078
4079namespace {
4080char hexdigit(unsigned X) { return X < 10 ? '0' + X : 'A' + X - 10; }
4081}
4082
4083void TargetGlobalInitX8632::lower(const IceString &Name, SizeT Align,
4084 bool IsInternal, bool IsConst,
4085 bool IsZeroInitializer, SizeT Size,
4086 const char *Data, bool DisableTranslation) {
4087 if (Ctx->isVerbose()) {
4088 // TODO: Consider moving the dump output into the driver to be
4089 // reused for all targets.
4090 Ostream &Str = Ctx->getStrDump();
4091 Str << "@" << Name << " = " << (IsInternal ? "internal" : "external");
4092 Str << (IsConst ? " constant" : " global");
4093 Str << " [" << Size << " x i8] ";
4094 if (IsZeroInitializer) {
4095 Str << "zeroinitializer";
4096 } else {
4097 Str << "c\"";
4098 // Code taken from PrintEscapedString() in AsmWriter.cpp. Keep
4099 // the strings in the same format as the .ll file for practical
4100 // diffing.
4101 for (uint64_t i = 0; i < Size; ++i) {
4102 unsigned char C = Data[i];
4103 if (isprint(C) && C != '\\' && C != '"')
4104 Str << C;
4105 else
4106 Str << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
4107 }
4108 Str << "\"";
4109 }
4110 Str << ", align " << Align << "\n";
4111 }
4112
4113 if (DisableTranslation)
4114 return;
4115
4116 Ostream &Str = Ctx->getStrEmit();
4117 // constant:
4118 // .section .rodata,"a",@progbits
4119 // .align ALIGN
4120 // .byte ...
4121 // .size NAME, SIZE
4122
4123 // non-constant:
4124 // .data
4125 // .align ALIGN
4126 // .byte ...
4127 // .size NAME, SIZE
4128
4129 // zeroinitializer (constant):
4130 // (.section or .data as above)
4131 // .align ALIGN
4132 // .zero SIZE
4133 // .size NAME, SIZE
4134
4135 // zeroinitializer (non-constant):
4136 // (.section or .data as above)
4137 // .comm NAME, SIZE, ALIGN
4138 // .local NAME
4139
4140 IceString MangledName = Ctx->mangleName(Name);
4141 // Start a new section.
4142 if (IsConst) {
4143 Str << "\t.section\t.rodata,\"a\",@progbits\n";
4144 } else {
4145 Str << "\t.type\t" << MangledName << ",@object\n";
4146 Str << "\t.data\n";
4147 }
4148 if (IsZeroInitializer) {
4149 if (IsConst) {
4150 Str << "\t.align\t" << Align << "\n";
4151 Str << MangledName << ":\n";
4152 Str << "\t.zero\t" << Size << "\n";
4153 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4154 } else {
4155 // TODO(stichnot): Put the appropriate non-constant
4156 // zeroinitializers in a .bss section to reduce object size.
4157 Str << "\t.comm\t" << MangledName << ", " << Size << ", " << Align
4158 << "\n";
4159 }
4160 } else {
4161 Str << "\t.align\t" << Align << "\n";
4162 Str << MangledName << ":\n";
4163 for (SizeT i = 0; i < Size; ++i) {
4164 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4165 }
4166 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4167 }
4168 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4169 << "\n";
4170}
4171
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004172} // end of namespace Ice