blob: d838553073a7eb831ef99a64c3adf00263f00b26 [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
Jim Stichnoth70d0a052014-11-14 15:53:46 -080012// high-level instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070013//
14//===----------------------------------------------------------------------===//
15
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070016#include "llvm/ADT/DenseMap.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/MathExtras.h"
19
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070020#include "IceCfg.h"
21#include "IceCfgNode.h"
Jim Stichnothbfb03e52014-08-26 10:29:05 -070022#include "IceClFlags.h"
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070023#include "IceDefs.h"
Karl Schimpfe3f64d02014-10-07 10:38:22 -070024#include "IceGlobalInits.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070025#include "IceInstX8632.h"
Jim Stichnoth336f6c42014-10-30 15:01:31 -070026#include "IceLiveness.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070027#include "IceOperand.h"
Jan Voungbd385e42014-09-18 18:18:10 -070028#include "IceRegistersX8632.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070029#include "IceTargetLoweringX8632.def"
30#include "IceTargetLoweringX8632.h"
Jan Voung8acded02014-09-22 18:02:25 -070031#include "IceUtils.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070032
33namespace Ice {
34
35namespace {
36
Matt Walace0ca8f2014-07-24 12:34:20 -070037// The following table summarizes the logic for lowering the fcmp
38// instruction. There is one table entry for each of the 16 conditions.
39//
40// The first four columns describe the case when the operands are
41// floating point scalar values. A comment in lowerFcmp() describes the
42// lowering template. In the most general case, there is a compare
43// followed by two conditional branches, because some fcmp conditions
44// don't map to a single x86 conditional branch. However, in many cases
45// it is possible to swap the operands in the comparison and have a
46// single conditional branch. Since it's quite tedious to validate the
47// table by hand, good execution tests are helpful.
48//
49// The last two columns describe the case when the operands are vectors
50// of floating point values. For most fcmp conditions, there is a clear
51// mapping to a single x86 cmpps instruction variant. Some fcmp
52// conditions require special code to handle and these are marked in the
53// table with a Cmpps_Invalid predicate.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070054const struct TableFcmp_ {
55 uint32_t Default;
Matt Walace0ca8f2014-07-24 12:34:20 -070056 bool SwapScalarOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070057 CondX86::BrCond C1, C2;
Matt Walace0ca8f2014-07-24 12:34:20 -070058 bool SwapVectorOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070059 CondX86::CmppsCond Predicate;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070060} TableFcmp[] = {
Matt Walace0ca8f2014-07-24 12:34:20 -070061#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jan Voungbd385e42014-09-18 18:18:10 -070062 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070063 ,
Jim Stichnothfac55172014-10-01 13:06:21 -070064 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070065#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -070066 };
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070067const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
68
69// The following table summarizes the logic for lowering the icmp instruction
70// for i32 and narrower types. Each icmp condition has a clear mapping to an
71// x86 conditional branch instruction.
72
73const struct TableIcmp32_ {
Jan Voungbd385e42014-09-18 18:18:10 -070074 CondX86::BrCond Mapping;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070075} TableIcmp32[] = {
76#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070077 { CondX86::C_32 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070078 ,
79 ICMPX8632_TABLE
80#undef X
81 };
82const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
83
84// The following table summarizes the logic for lowering the icmp instruction
85// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
86// conditional branches are needed. For the other conditions, three separate
87// conditional branches are needed.
88const struct TableIcmp64_ {
Jan Voungbd385e42014-09-18 18:18:10 -070089 CondX86::BrCond C1, C2, C3;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070090} TableIcmp64[] = {
91#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070092 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070093 ,
94 ICMPX8632_TABLE
95#undef X
96 };
97const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
98
Jan Voungbd385e42014-09-18 18:18:10 -070099CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700100 size_t Index = static_cast<size_t>(Cond);
101 assert(Index < TableIcmp32Size);
102 return TableIcmp32[Index].Mapping;
103}
104
Matt Wala49889232014-07-18 12:45:09 -0700105const struct TableTypeX8632Attributes_ {
106 Type InVectorElementType;
107} TableTypeX8632Attributes[] = {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700108#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Matt Wala49889232014-07-18 12:45:09 -0700109 { elementty } \
110 ,
111 ICETYPEX8632_TABLE
112#undef X
113 };
114const size_t TableTypeX8632AttributesSize =
115 llvm::array_lengthof(TableTypeX8632Attributes);
116
117// Return the type which the elements of the vector have in the X86
118// representation of the vector.
119Type getInVectorElementType(Type Ty) {
120 assert(isVectorType(Ty));
121 size_t Index = static_cast<size_t>(Ty);
Jim Stichnoth6e992142014-07-30 14:45:20 -0700122 (void)Index;
Matt Wala49889232014-07-18 12:45:09 -0700123 assert(Index < TableTypeX8632AttributesSize);
124 return TableTypeX8632Attributes[Ty].InVectorElementType;
125}
126
Matt Wala45a06232014-07-09 16:33:22 -0700127// The maximum number of arguments to pass in XMM registers
Matt Wala105b7042014-08-11 19:56:19 -0700128const uint32_t X86_MAX_XMM_ARGS = 4;
Matt Wala83b80362014-07-16 10:21:30 -0700129// The number of bits in a byte
Matt Wala105b7042014-08-11 19:56:19 -0700130const uint32_t X86_CHAR_BIT = 8;
131// Stack alignment
132const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
133// Size of the return address on the stack
134const uint32_t X86_RET_IP_SIZE_BYTES = 4;
Matt Walad4799f42014-08-14 14:24:12 -0700135// The base 2 logarithm of the width in bytes of the smallest stack slot
136const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
137// The base 2 logarithm of the width in bytes of the largest stack slot
138const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
Matt Walac3302742014-08-15 16:21:56 -0700139// The number of different NOP instructions
140const uint32_t X86_NUM_NOP_VARIANTS = 5;
Matt Wala105b7042014-08-11 19:56:19 -0700141
Matt Walad4799f42014-08-14 14:24:12 -0700142// Value and Alignment are in bytes. Return Value adjusted to the next
143// highest multiple of Alignment.
144uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
Matt Wala105b7042014-08-11 19:56:19 -0700145 // power of 2
Matt Walad4799f42014-08-14 14:24:12 -0700146 assert((Alignment & (Alignment - 1)) == 0);
147 return (Value + Alignment - 1) & -Alignment;
148}
149
150// Value is in bytes. Return Value adjusted to the next highest multiple
151// of the stack alignment.
152uint32_t applyStackAlignment(uint32_t Value) {
153 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
Matt Wala105b7042014-08-11 19:56:19 -0700154}
Matt Wala45a06232014-07-09 16:33:22 -0700155
Matt Wala0a450512014-07-30 12:44:39 -0700156// Instruction set options
157namespace cl = ::llvm::cl;
Jim Stichnothfac55172014-10-01 13:06:21 -0700158cl::opt<TargetX8632::X86InstructionSet>
159CLInstructionSet("mattr", cl::desc("X86 target attributes"),
160 cl::init(TargetX8632::SSE2),
161 cl::values(clEnumValN(TargetX8632::SSE2, "sse2",
162 "Enable SSE2 instructions (default)"),
163 clEnumValN(TargetX8632::SSE4_1, "sse4.1",
164 "Enable SSE 4.1 instructions"),
165 clEnumValEnd));
Matt Wala0a450512014-07-30 12:44:39 -0700166
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700167// In some cases, there are x-macros tables for both high-level and
168// low-level instructions/operands that use the same enum key value.
169// The tables are kept separate to maintain a proper separation
Jim Stichnothfac55172014-10-01 13:06:21 -0700170// between abstraction layers. There is a risk that the tables could
171// get out of sync if enum values are reordered or if entries are
172// added or deleted. The following dummy namespaces use
173// static_asserts to ensure everything is kept in sync.
174
175// Validate the enum values in FCMPX8632_TABLE.
176namespace dummy1 {
177// Define a temporary set of enum values based on low-level table
178// entries.
179enum _tmp_enum {
Matt Walace0ca8f2014-07-24 12:34:20 -0700180#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700181 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700182#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700183 _num
184};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700185// Define a set of constants based on high-level table entries.
186#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700187ICEINSTFCMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700188#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700189// Define a set of constants based on low-level table entries, and
190// ensure the table entry keys are consistent.
Matt Walace0ca8f2014-07-24 12:34:20 -0700191#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700192 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700193 static_assert( \
194 _table1_##val == _table2_##val, \
195 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
196FCMPX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700197#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700198// Repeat the static asserts with respect to the high-level table
199// entries in case the high-level table has extra entries.
200#define X(tag, str) \
201 static_assert( \
202 _table1_##tag == _table2_##tag, \
203 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
204ICEINSTFCMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700205#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700206} // end of namespace dummy1
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700207
Jim Stichnothfac55172014-10-01 13:06:21 -0700208// Validate the enum values in ICMPX8632_TABLE.
209namespace dummy2 {
210// Define a temporary set of enum values based on low-level table
211// entries.
212enum _tmp_enum {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700213#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700214 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700215#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700216 _num
217};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700218// Define a set of constants based on high-level table entries.
219#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700220ICEINSTICMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700221#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700222// Define a set of constants based on low-level table entries, and
223// ensure the table entry keys are consistent.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700224#define X(val, C_32, C1_64, C2_64, C3_64) \
225 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700226 static_assert( \
227 _table1_##val == _table2_##val, \
228 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
229ICMPX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700230#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700231// Repeat the static asserts with respect to the high-level table
232// entries in case the high-level table has extra entries.
233#define X(tag, str) \
234 static_assert( \
235 _table1_##tag == _table2_##tag, \
236 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
237ICEINSTICMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700238#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700239} // end of namespace dummy2
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700240
Jim Stichnothfac55172014-10-01 13:06:21 -0700241// Validate the enum values in ICETYPEX8632_TABLE.
242namespace dummy3 {
243// Define a temporary set of enum values based on low-level table
244// entries.
245enum _tmp_enum {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700246#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
Jim Stichnothfac55172014-10-01 13:06:21 -0700247 ICETYPEX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700248#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700249 _num
250};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700251// Define a set of constants based on high-level table entries.
Matt Wala928f1292014-07-07 16:50:46 -0700252#define X(tag, size, align, elts, elty, str) \
253 static const int _table1_##tag = tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700254ICETYPE_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700255#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700256// Define a set of constants based on low-level table entries, and
257// ensure the table entry keys are consistent.
Jim Stichnothbca2f652014-11-01 10:13:54 -0700258#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700259 static const int _table2_##tag = _tmp_##tag; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700260 static_assert(_table1_##tag == _table2_##tag, \
261 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
262ICETYPEX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700263#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700264// Repeat the static asserts with respect to the high-level table
265// entries in case the high-level table has extra entries.
Matt Wala928f1292014-07-07 16:50:46 -0700266#define X(tag, size, align, elts, elty, str) \
Jim Stichnothfac55172014-10-01 13:06:21 -0700267 static_assert(_table1_##tag == _table2_##tag, \
268 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
269ICETYPE_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700270#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700271} // end of namespace dummy3
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700272
273} // end of anonymous namespace
274
275TargetX8632::TargetX8632(Cfg *Func)
Matt Wala0a450512014-07-30 12:44:39 -0700276 : TargetLowering(Func), InstructionSet(CLInstructionSet),
Matt Wala105b7042014-08-11 19:56:19 -0700277 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
Jim Stichnoth33c80642014-11-11 09:49:04 -0800278 SpillAreaSizeBytes(0), NextLabelNumber(0) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700279 // TODO: Don't initialize IntegerRegisters and friends every time.
280 // Instead, initialize in some sort of static initializer for the
281 // class.
Jan Voungbd385e42014-09-18 18:18:10 -0700282 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
283 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
284 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
285 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
286 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
287 ScratchRegs.resize(RegX8632::Reg_NUM);
288#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700289 frameptr, isI8, isInt, isFP) \
Jan Voungbd385e42014-09-18 18:18:10 -0700290 IntegerRegisters[RegX8632::val] = isInt; \
291 IntegerRegistersI8[RegX8632::val] = isI8; \
292 FloatRegisters[RegX8632::val] = isFP; \
293 VectorRegisters[RegX8632::val] = isFP; \
294 ScratchRegs[RegX8632::val] = scratch;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700295 REGX8632_TABLE;
296#undef X
297 TypeToRegisterSet[IceType_void] = InvalidRegisters;
298 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
299 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
300 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
301 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
302 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
303 TypeToRegisterSet[IceType_f32] = FloatRegisters;
304 TypeToRegisterSet[IceType_f64] = FloatRegisters;
Matt Wala928f1292014-07-07 16:50:46 -0700305 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
306 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
307 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
308 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
309 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
310 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
311 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700312}
313
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700314void TargetX8632::translateO2() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700315 TimerMarker T(TimerStack::TT_O2, Func);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700316
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700317 if (!Ctx->getFlags().PhiEdgeSplit) {
318 // Lower Phi instructions.
319 Func->placePhiLoads();
320 if (Func->hasError())
321 return;
322 Func->placePhiStores();
323 if (Func->hasError())
324 return;
325 Func->deletePhis();
326 if (Func->hasError())
327 return;
328 Func->dump("After Phi lowering");
329 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700330
331 // Address mode optimization.
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700332 Func->getVMetadata()->init(VMK_SingleDefs);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700333 Func->doAddressOpt();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700334
Matt Wala45a06232014-07-09 16:33:22 -0700335 // Argument lowering
Matt Wala45a06232014-07-09 16:33:22 -0700336 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700337
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700338 // Target lowering. This requires liveness analysis for some parts
339 // of the lowering decisions, such as compare/branch fusing. If
340 // non-lightweight liveness analysis is used, the instructions need
341 // to be renumbered first. TODO: This renumbering should only be
342 // necessary if we're actually calculating live intervals, which we
343 // only do for register allocation.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700344 Func->renumberInstructions();
345 if (Func->hasError())
346 return;
Matt Wala45a06232014-07-09 16:33:22 -0700347
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700348 // TODO: It should be sufficient to use the fastest liveness
349 // calculation, i.e. livenessLightweight(). However, for some
350 // reason that slows down the rest of the translation. Investigate.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700351 Func->liveness(Liveness_Basic);
352 if (Func->hasError())
353 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700354 Func->dump("After x86 address mode opt");
Matt Wala45a06232014-07-09 16:33:22 -0700355
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700356 Func->genCode();
357 if (Func->hasError())
358 return;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700359 Func->dump("After x86 codegen");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700360
361 // Register allocation. This requires instruction renumbering and
362 // full liveness analysis.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700363 Func->renumberInstructions();
364 if (Func->hasError())
365 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700366 Func->liveness(Liveness_Intervals);
367 if (Func->hasError())
368 return;
Jim Stichnoth9c234e22014-10-01 09:28:21 -0700369 // Validate the live range computations. The expensive validation
370 // call is deliberately only made when assertions are enabled.
371 assert(Func->validateLiveness());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700372 // The post-codegen dump is done here, after liveness analysis and
373 // associated cleanup, to make the dump cleaner and more useful.
374 Func->dump("After initial x8632 codegen");
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700375 Func->getVMetadata()->init(VMK_All);
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800376 regAlloc(RAK_Global);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700377 if (Func->hasError())
378 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700379 Func->dump("After linear scan regalloc");
380
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700381 if (Ctx->getFlags().PhiEdgeSplit) {
382 Func->advancedPhiLowering();
383 Func->dump("After advanced Phi lowering");
384 }
385
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700386 // Stack frame mapping.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700387 Func->genFrame();
388 if (Func->hasError())
389 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700390 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700391
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700392 Func->contractEmptyNodes();
393 Func->reorderNodes();
Jim Stichnoth98712a32014-10-24 10:59:02 -0700394
Jim Stichnothff9c7062014-09-18 04:50:49 -0700395 // Branch optimization. This needs to be done just before code
396 // emission. In particular, no transformations that insert or
397 // reorder CfgNodes should be done after branch optimization. We go
398 // ahead and do it before nop insertion to reduce the amount of work
399 // needed for searching for opportunities.
400 Func->doBranchOpt();
401 Func->dump("After branch optimization");
402
Matt Walac3302742014-08-15 16:21:56 -0700403 // Nop insertion
404 if (shouldDoNopInsertion()) {
405 Func->doNopInsertion();
406 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700407}
408
409void TargetX8632::translateOm1() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700410 TimerMarker T(TimerStack::TT_Om1, Func);
Jim Stichnothbb8b6242014-11-04 09:10:01 -0800411
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700412 Func->placePhiLoads();
413 if (Func->hasError())
414 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700415 Func->placePhiStores();
416 if (Func->hasError())
417 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700418 Func->deletePhis();
419 if (Func->hasError())
420 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700421 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700422
Matt Wala45a06232014-07-09 16:33:22 -0700423 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700424
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700425 Func->genCode();
426 if (Func->hasError())
427 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700428 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700429
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800430 regAlloc(RAK_InfOnly);
431 if (Func->hasError())
432 return;
433 Func->dump("After regalloc of infinite-weight variables");
434
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700435 Func->genFrame();
436 if (Func->hasError())
437 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700438 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700439
440 // Nop insertion
441 if (shouldDoNopInsertion()) {
442 Func->doNopInsertion();
443 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700444}
445
Jim Stichnothff9c7062014-09-18 04:50:49 -0700446bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
447 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
448 return Br->optimizeBranch(NextNode);
449 }
450 return false;
451}
452
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700453IceString TargetX8632::RegNames[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700454#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700455 frameptr, isI8, isInt, isFP) \
456 name,
457 REGX8632_TABLE
458#undef X
459};
460
Jim Stichnoth98712a32014-10-24 10:59:02 -0700461Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {
462 if (Ty == IceType_void)
463 Ty = IceType_i32;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700464 if (PhysicalRegisters[Ty].empty())
465 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
466 assert(RegNum < PhysicalRegisters[Ty].size());
467 Variable *Reg = PhysicalRegisters[Ty][RegNum];
Jim Stichnothae953202014-12-20 06:17:49 -0800468 if (Reg == nullptr) {
Jim Stichnoth98712a32014-10-24 10:59:02 -0700469 Reg = Func->makeVariable(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700470 Reg->setRegNum(RegNum);
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700471 PhysicalRegisters[Ty][RegNum] = Reg;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700472 // Specially mark esp as an "argument" so that it is considered
473 // live upon function entry.
Jim Stichnoth47752552014-10-13 17:15:08 -0700474 if (RegNum == RegX8632::Reg_esp) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700475 Func->addImplicitArg(Reg);
Jim Stichnoth47752552014-10-13 17:15:08 -0700476 Reg->setIgnoreLiveness();
477 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700478 }
479 return Reg;
480}
481
482IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
Jan Voungbd385e42014-09-18 18:18:10 -0700483 assert(RegNum < RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700484 static IceString RegNames8[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700485#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700486 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700487 name8,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700488 REGX8632_TABLE
489#undef X
490 };
491 static IceString RegNames16[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700492#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700493 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700494 name16,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700495 REGX8632_TABLE
496#undef X
497 };
498 switch (Ty) {
499 case IceType_i1:
500 case IceType_i8:
501 return RegNames8[RegNum];
502 case IceType_i16:
503 return RegNames16[RegNum];
504 default:
505 return RegNames[RegNum];
506 }
507}
508
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700509void TargetX8632::emitVariable(const Variable *Var) const {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700510 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700511 if (Var->hasReg()) {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700512 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700513 return;
514 }
Jim Stichnothdd165072014-11-02 09:41:45 -0800515 if (Var->getWeight().isInf())
516 llvm_unreachable("Infinite-weight Variable has no register assigned");
Jim Stichnothbca2f652014-11-01 10:13:54 -0700517 const Type Ty = IceType_i32;
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700518 int32_t Offset = Var->getStackOffset();
519 if (!hasFramePointer())
520 Offset += getStackAdjustment();
Jim Stichnothbca2f652014-11-01 10:13:54 -0700521 if (Offset)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700522 Str << Offset;
Jim Stichnothbca2f652014-11-01 10:13:54 -0700523 Str << "(%" << getRegName(getFrameOrStackReg(), Ty) << ")";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700524}
525
Jan Voung8acded02014-09-22 18:02:25 -0700526x86::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
Jim Stichnothdd165072014-11-02 09:41:45 -0800527 if (Var->hasReg())
528 llvm_unreachable("Stack Variable has a register assigned");
529 if (Var->getWeight().isInf())
530 llvm_unreachable("Infinite-weight Variable has no register assigned");
Jan Voung8acded02014-09-22 18:02:25 -0700531 int32_t Offset = Var->getStackOffset();
532 if (!hasFramePointer())
533 Offset += getStackAdjustment();
534 return x86::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
535}
536
Matt Wala45a06232014-07-09 16:33:22 -0700537void TargetX8632::lowerArguments() {
538 VarList &Args = Func->getArgs();
539 // The first four arguments of vector type, regardless of their
540 // position relative to the other arguments in the argument list, are
541 // passed in registers xmm0 - xmm3.
542 unsigned NumXmmArgs = 0;
543
544 Context.init(Func->getEntryNode());
545 Context.setInsertPoint(Context.getCur());
546
547 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
548 ++I) {
549 Variable *Arg = Args[I];
550 Type Ty = Arg->getType();
551 if (!isVectorType(Ty))
552 continue;
553 // Replace Arg in the argument list with the home register. Then
554 // generate an instruction in the prolog to copy the home register
555 // to the assigned location of Arg.
Jan Voungbd385e42014-09-18 18:18:10 -0700556 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
Matt Wala45a06232014-07-09 16:33:22 -0700557 ++NumXmmArgs;
Jim Stichnoth9a04c072014-12-11 15:51:42 -0800558 Variable *RegisterArg = Func->makeVariable(Ty);
559 if (ALLOW_DUMP)
560 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
Matt Wala45a06232014-07-09 16:33:22 -0700561 RegisterArg->setRegNum(RegNum);
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700562 RegisterArg->setIsArg();
563 Arg->setIsArg(false);
Matt Wala45a06232014-07-09 16:33:22 -0700564
565 Args[I] = RegisterArg;
566 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
567 }
568}
569
Matt Walad4799f42014-08-14 14:24:12 -0700570void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
571 // Sort the variables into buckets according to the log of their width
572 // in bytes.
573 const SizeT NumBuckets =
574 X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1;
575 VarList Buckets[NumBuckets];
576
Jim Stichnothf44f3712014-10-01 14:05:51 -0700577 for (Variable *Var : Source) {
Matt Walad4799f42014-08-14 14:24:12 -0700578 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());
Jan Voung1eb3a552014-09-12 11:05:47 -0700579 SizeT LogNaturalAlignment = llvm::findFirstSet(NaturalAlignment);
Matt Walad4799f42014-08-14 14:24:12 -0700580 assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE);
581 assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE);
582 SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE;
583 Buckets[BucketIndex].push_back(Var);
584 }
585
586 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {
587 VarList &List = Buckets[NumBuckets - I - 1];
588 Dest.insert(Dest.end(), List.begin(), List.end());
589 }
590}
591
Matt Wala45a06232014-07-09 16:33:22 -0700592// Helper function for addProlog().
593//
594// This assumes Arg is an argument passed on the stack. This sets the
595// frame offset for Arg and updates InArgsSizeBytes according to Arg's
596// width. For an I64 arg that has been split into Lo and Hi components,
597// it calls itself recursively on the components, taking care to handle
598// Lo first because of the little-endian architecture. Lastly, this
599// function generates an instruction to copy Arg into its assigned
600// register if applicable.
601void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
602 size_t BasicFrameOffset,
603 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700604 Variable *Lo = Arg->getLo();
605 Variable *Hi = Arg->getHi();
606 Type Ty = Arg->getType();
607 if (Lo && Hi && Ty == IceType_i64) {
608 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
609 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
Matt Wala45a06232014-07-09 16:33:22 -0700610 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
611 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700612 return;
613 }
Matt Wala105b7042014-08-11 19:56:19 -0700614 if (isVectorType(Ty)) {
615 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
616 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700617 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
Matt Wala45a06232014-07-09 16:33:22 -0700618 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700619 if (Arg->hasReg()) {
620 assert(Ty != IceType_i64);
621 OperandX8632Mem *Mem = OperandX8632Mem::create(
Jim Stichnothd2cb4362014-11-20 11:24:42 -0800622 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
Matt Wala45a06232014-07-09 16:33:22 -0700623 if (isVectorType(Arg->getType())) {
624 _movp(Arg, Mem);
625 } else {
626 _mov(Arg, Mem);
627 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700628 // This argument-copying instruction uses an explicit
629 // OperandX8632Mem operand instead of a Variable, so its
630 // fill-from-stack operation has to be tracked separately for
631 // statistics.
632 Ctx->statsUpdateFills();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700633 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700634}
635
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700636Type TargetX8632::stackSlotType() { return IceType_i32; }
637
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700638void TargetX8632::addProlog(CfgNode *Node) {
Matt Walad4799f42014-08-14 14:24:12 -0700639 // Stack frame layout:
640 //
641 // +------------------------+
642 // | 1. return address |
643 // +------------------------+
644 // | 2. preserved registers |
645 // +------------------------+
646 // | 3. padding |
647 // +------------------------+
648 // | 4. global spill area |
649 // +------------------------+
650 // | 5. padding |
651 // +------------------------+
652 // | 6. local spill area |
653 // +------------------------+
654 // | 7. padding |
655 // +------------------------+
656 // | 8. allocas |
657 // +------------------------+
658 //
659 // The following variables record the size in bytes of the given areas:
660 // * X86_RET_IP_SIZE_BYTES: area 1
661 // * PreservedRegsSizeBytes: area 2
662 // * SpillAreaPaddingBytes: area 3
663 // * GlobalsSize: area 4
664 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
665 // * LocalsSpillAreaSize: area 6
666 // * SpillAreaSizeBytes: areas 3 - 7
667
Jim Stichnoth33c80642014-11-11 09:49:04 -0800668 // Make a final pass over the Cfg to determine which variables need
669 // stack slots.
670 llvm::BitVector IsVarReferenced(Func->getNumVariables());
671 for (CfgNode *Node : Func->getNodes()) {
672 for (auto Inst = Node->getInsts().begin(), E = Node->getInsts().end();
673 Inst != E; ++Inst) {
674 if (Inst->isDeleted())
675 continue;
676 if (const Variable *Var = Inst->getDest())
677 IsVarReferenced[Var->getIndex()] = true;
678 for (SizeT I = 0; I < Inst->getSrcSize(); ++I) {
679 Operand *Src = Inst->getSrc(I);
680 SizeT NumVars = Src->getNumVars();
681 for (SizeT J = 0; J < NumVars; ++J) {
682 const Variable *Var = Src->getVar(J);
683 IsVarReferenced[Var->getIndex()] = true;
684 }
685 }
686 }
687 }
688
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700689 // If SimpleCoalescing is false, each variable without a register
690 // gets its own unique stack slot, which leads to large stack
691 // frames. If SimpleCoalescing is true, then each "global" variable
692 // without a register gets its own slot, but "local" variable slots
693 // are reused across basic blocks. E.g., if A and B are local to
Jan Voung44d53e12014-09-11 19:18:03 -0700694 // block 1 and C is local to block 2, then C may share a slot with A or B.
695 //
696 // We cannot coalesce stack slots if this function calls a "returns twice"
697 // function. In that case, basic blocks may be revisited, and variables
698 // local to those basic blocks are actually live until after the
699 // called function returns a second time.
700 const bool SimpleCoalescing = !callsReturnsTwice();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700701 size_t InArgsSizeBytes = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700702 size_t PreservedRegsSizeBytes = 0;
Matt Walad4799f42014-08-14 14:24:12 -0700703 SpillAreaSizeBytes = 0;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700704 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700705 Context.init(Node);
706 Context.setInsertPoint(Context.getCur());
707
708 // Determine stack frame offsets for each Variable without a
709 // register assignment. This can be done as one variable per stack
710 // slot. Or, do coalescing by running the register allocator again
711 // with an infinite set of registers (as a side effect, this gives
712 // variables a second chance at physical register assignment).
713 //
714 // A middle ground approach is to leverage sparsity and allocate one
715 // block of space on the frame for globals (variables with
716 // multi-block lifetime), and one block to share for locals
717 // (single-block lifetime).
718
719 llvm::SmallBitVector CalleeSaves =
720 getRegisterSet(RegSet_CalleeSave, RegSet_None);
721
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700722 size_t GlobalsSize = 0;
723 std::vector<size_t> LocalsSize(Func->getNumNodes());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700724
725 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
Matt Walad4799f42014-08-14 14:24:12 -0700726 // SpillAreaSizeBytes.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700727 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
728 const VarList &Variables = Func->getVariables();
729 const VarList &Args = Func->getArgs();
Jim Stichnoth800dab22014-09-20 12:25:02 -0700730 VarList SpilledVariables, SortedSpilledVariables, VariablesLinkedToSpillSlots;
Matt Walad4799f42014-08-14 14:24:12 -0700731
732 // If there is a separate locals area, this specifies the alignment
733 // for it.
734 uint32_t LocalsSlotsAlignmentBytes = 0;
735 // The entire spill locations area gets aligned to largest natural
736 // alignment of the variables that have a spill slot.
737 uint32_t SpillAreaAlignmentBytes = 0;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700738 for (Variable *Var : Variables) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700739 if (Var->hasReg()) {
740 RegsUsed[Var->getRegNum()] = true;
741 continue;
742 }
Matt Wala45a06232014-07-09 16:33:22 -0700743 // An argument either does not need a stack slot (if passed in a
744 // register) or already has one (if passed on the stack).
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700745 if (Var->getIsArg())
746 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700747 // An unreferenced variable doesn't need a stack slot.
Jim Stichnoth33c80642014-11-11 09:49:04 -0800748 if (!IsVarReferenced[Var->getIndex()])
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700749 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700750 // A spill slot linked to a variable with a stack slot should reuse
751 // that stack slot.
Jim Stichnoth800dab22014-09-20 12:25:02 -0700752 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
753 assert(Var->getWeight() == RegWeight::Zero);
754 if (!SpillVar->getLinkedTo()->hasReg()) {
755 VariablesLinkedToSpillSlots.push_back(Var);
756 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700757 }
758 }
Matt Walad4799f42014-08-14 14:24:12 -0700759 SpilledVariables.push_back(Var);
760 }
761
762 SortedSpilledVariables.reserve(SpilledVariables.size());
763 sortByAlignment(SortedSpilledVariables, SpilledVariables);
Jim Stichnothf44f3712014-10-01 14:05:51 -0700764 for (Variable *Var : SortedSpilledVariables) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700765 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Matt Walad4799f42014-08-14 14:24:12 -0700766 if (!SpillAreaAlignmentBytes)
767 SpillAreaAlignmentBytes = Increment;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700768 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
769 if (VMetadata->isMultiBlock(Var)) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700770 GlobalsSize += Increment;
771 } else {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700772 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700773 LocalsSize[NodeIndex] += Increment;
Matt Walad4799f42014-08-14 14:24:12 -0700774 if (LocalsSize[NodeIndex] > SpillAreaSizeBytes)
775 SpillAreaSizeBytes = LocalsSize[NodeIndex];
776 if (!LocalsSlotsAlignmentBytes)
777 LocalsSlotsAlignmentBytes = Increment;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700778 }
779 } else {
Matt Walad4799f42014-08-14 14:24:12 -0700780 SpillAreaSizeBytes += Increment;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700781 }
782 }
Matt Walad4799f42014-08-14 14:24:12 -0700783 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
784
785 SpillAreaSizeBytes += GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700786
787 // Add push instructions for preserved registers.
Jim Stichnoth18735602014-09-16 19:59:35 -0700788 uint32_t NumCallee = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700789 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
790 if (CalleeSaves[i] && RegsUsed[i]) {
Jim Stichnoth18735602014-09-16 19:59:35 -0700791 ++NumCallee;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700792 PreservedRegsSizeBytes += 4;
Jan Voung0b9eee52014-10-07 11:20:10 -0700793 _push(getPhysicalRegister(i));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700794 }
795 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700796 Ctx->statsUpdateRegistersSaved(NumCallee);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700797
798 // Generate "push ebp; mov ebp, esp"
799 if (IsEbpBasedFrame) {
800 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
801 .count() == 0);
802 PreservedRegsSizeBytes += 4;
Jan Voungbd385e42014-09-18 18:18:10 -0700803 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
804 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung0b9eee52014-10-07 11:20:10 -0700805 _push(ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700806 _mov(ebp, esp);
807 }
808
Matt Walad4799f42014-08-14 14:24:12 -0700809 // Align the variables area. SpillAreaPaddingBytes is the size of
810 // the region after the preserved registers and before the spill
811 // areas.
812 uint32_t SpillAreaPaddingBytes = 0;
813 if (SpillAreaAlignmentBytes) {
814 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
815 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
816 uint32_t SpillAreaStart =
817 applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
818 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
819 SpillAreaSizeBytes += SpillAreaPaddingBytes;
Matt Wala105b7042014-08-11 19:56:19 -0700820 }
821
Matt Walad4799f42014-08-14 14:24:12 -0700822 // If there are separate globals and locals areas, make sure the
823 // locals area is aligned by padding the end of the globals area.
824 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
825 if (LocalsSlotsAlignmentBytes) {
826 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
827 GlobalsAndSubsequentPaddingSize =
828 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
829 SpillAreaSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;
830 }
831
832 // Align esp if necessary.
833 if (NeedsStackAlignment) {
834 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
835 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
836 SpillAreaSizeBytes = StackSize - StackOffset;
837 }
838
839 // Generate "sub esp, SpillAreaSizeBytes"
840 if (SpillAreaSizeBytes)
Jan Voungbd385e42014-09-18 18:18:10 -0700841 _sub(getPhysicalRegister(RegX8632::Reg_esp),
Jim Stichnothd2cb4362014-11-20 11:24:42 -0800842 Ctx->getConstantInt32(SpillAreaSizeBytes));
Jim Stichnoth18735602014-09-16 19:59:35 -0700843 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700844
845 resetStackAdjustment();
846
Matt Wala45a06232014-07-09 16:33:22 -0700847 // Fill in stack offsets for stack args, and copy args into registers
848 // for those that were register-allocated. Args are pushed right to
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700849 // left, so Arg[0] is closest to the stack/frame pointer.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700850 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Matt Wala105b7042014-08-11 19:56:19 -0700851 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700852 if (!IsEbpBasedFrame)
Matt Walad4799f42014-08-14 14:24:12 -0700853 BasicFrameOffset += SpillAreaSizeBytes;
Matt Wala45a06232014-07-09 16:33:22 -0700854
855 unsigned NumXmmArgs = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700856 for (SizeT i = 0; i < Args.size(); ++i) {
857 Variable *Arg = Args[i];
Matt Wala45a06232014-07-09 16:33:22 -0700858 // Skip arguments passed in registers.
859 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
860 ++NumXmmArgs;
861 continue;
862 }
863 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700864 }
865
866 // Fill in stack offsets for locals.
Matt Walad4799f42014-08-14 14:24:12 -0700867 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700868 LocalsSize.assign(LocalsSize.size(), 0);
Matt Walad4799f42014-08-14 14:24:12 -0700869 size_t NextStackOffset = GlobalsSpaceUsed;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700870 for (Variable *Var : SortedSpilledVariables) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700871 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700872 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
873 if (VMetadata->isMultiBlock(Var)) {
Matt Walad4799f42014-08-14 14:24:12 -0700874 GlobalsSpaceUsed += Increment;
875 NextStackOffset = GlobalsSpaceUsed;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700876 } else {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700877 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700878 LocalsSize[NodeIndex] += Increment;
Matt Walad4799f42014-08-14 14:24:12 -0700879 NextStackOffset = SpillAreaPaddingBytes +
880 GlobalsAndSubsequentPaddingSize +
881 LocalsSize[NodeIndex];
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700882 }
883 } else {
884 NextStackOffset += Increment;
885 }
886 if (IsEbpBasedFrame)
887 Var->setStackOffset(-NextStackOffset);
888 else
Matt Walad4799f42014-08-14 14:24:12 -0700889 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700890 }
Matt Walad4799f42014-08-14 14:24:12 -0700891 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700892 this->HasComputedFrame = true;
893
Matt Walad4799f42014-08-14 14:24:12 -0700894 // Assign stack offsets to variables that have been linked to spilled
895 // variables.
Jim Stichnothf44f3712014-10-01 14:05:51 -0700896 for (Variable *Var : VariablesLinkedToSpillSlots) {
Jim Stichnoth800dab22014-09-20 12:25:02 -0700897 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
Matt Walad4799f42014-08-14 14:24:12 -0700898 Var->setStackOffset(Linked->getStackOffset());
899 }
900
Karl Schimpfb6c96af2014-11-17 10:58:39 -0800901 if (ALLOW_DUMP && Func->getContext()->isVerbose(IceV_Frame)) {
Matt Walad4799f42014-08-14 14:24:12 -0700902 Ostream &Str = Func->getContext()->getStrDump();
903
904 Str << "Stack layout:\n";
905 uint32_t EspAdjustmentPaddingSize =
906 SpillAreaSizeBytes - LocalsSpillAreaSize -
907 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
908 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
909 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
910 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
911 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
912 << " globals spill area = " << GlobalsSize << " bytes\n"
913 << " globals-locals spill areas intermediate padding = "
914 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
915 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
916 << " esp alignment padding = " << EspAdjustmentPaddingSize
917 << " bytes\n";
918
919 Str << "Stack details:\n"
920 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
921 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
922 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
923 << " bytes\n"
924 << " is ebp based = " << IsEbpBasedFrame << "\n";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700925 }
926}
927
928void TargetX8632::addEpilog(CfgNode *Node) {
929 InstList &Insts = Node->getInsts();
930 InstList::reverse_iterator RI, E;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700931 // TODO(stichnot): Use llvm::make_range with LLVM 3.5.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700932 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
933 if (llvm::isa<InstX8632Ret>(*RI))
934 break;
935 }
936 if (RI == E)
937 return;
938
939 // Convert the reverse_iterator position into its corresponding
940 // (forward) iterator position.
941 InstList::iterator InsertPoint = RI.base();
942 --InsertPoint;
943 Context.init(Node);
944 Context.setInsertPoint(InsertPoint);
945
Jan Voungbd385e42014-09-18 18:18:10 -0700946 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700947 if (IsEbpBasedFrame) {
Jan Voungbd385e42014-09-18 18:18:10 -0700948 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700949 _mov(esp, ebp);
950 _pop(ebp);
951 } else {
Matt Walad4799f42014-08-14 14:24:12 -0700952 // add esp, SpillAreaSizeBytes
953 if (SpillAreaSizeBytes)
Jim Stichnothd2cb4362014-11-20 11:24:42 -0800954 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700955 }
956
957 // Add pop instructions for preserved registers.
958 llvm::SmallBitVector CalleeSaves =
959 getRegisterSet(RegSet_CalleeSave, RegSet_None);
960 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
961 SizeT j = CalleeSaves.size() - i - 1;
Jan Voungbd385e42014-09-18 18:18:10 -0700962 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700963 continue;
964 if (CalleeSaves[j] && RegsUsed[j]) {
965 _pop(getPhysicalRegister(j));
966 }
967 }
968}
969
Jim Stichnothf61d5b22014-05-23 13:31:24 -0700970template <typename T> struct PoolTypeConverter {};
971
972template <> struct PoolTypeConverter<float> {
973 typedef float PrimitiveFpType;
974 typedef uint32_t PrimitiveIntType;
975 typedef ConstantFloat IceType;
976 static const Type Ty = IceType_f32;
977 static const char *TypeName;
978 static const char *AsmTag;
979 static const char *PrintfString;
980};
981const char *PoolTypeConverter<float>::TypeName = "float";
982const char *PoolTypeConverter<float>::AsmTag = ".long";
983const char *PoolTypeConverter<float>::PrintfString = "0x%x";
984
985template <> struct PoolTypeConverter<double> {
986 typedef double PrimitiveFpType;
987 typedef uint64_t PrimitiveIntType;
988 typedef ConstantDouble IceType;
989 static const Type Ty = IceType_f64;
990 static const char *TypeName;
991 static const char *AsmTag;
992 static const char *PrintfString;
993};
994const char *PoolTypeConverter<double>::TypeName = "double";
995const char *PoolTypeConverter<double>::AsmTag = ".quad";
996const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
997
998template <typename T> void TargetX8632::emitConstantPool() const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -0800999 // Note: Still used by emit IAS.
Jim Stichnothf61d5b22014-05-23 13:31:24 -07001000 Ostream &Str = Ctx->getStrEmit();
1001 Type Ty = T::Ty;
1002 SizeT Align = typeAlignInBytes(Ty);
1003 ConstantList Pool = Ctx->getConstantPool(Ty);
1004
1005 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
1006 << "\n";
1007 Str << "\t.align\t" << Align << "\n";
Jim Stichnothf44f3712014-10-01 14:05:51 -07001008 for (Constant *C : Pool) {
1009 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
Jim Stichnothf61d5b22014-05-23 13:31:24 -07001010 typename T::PrimitiveFpType Value = Const->getValue();
1011 // Use memcpy() to copy bits from Value into RawValue in a way
1012 // that avoids breaking strict-aliasing rules.
1013 typename T::PrimitiveIntType RawValue;
1014 memcpy(&RawValue, &Value, sizeof(Value));
1015 char buf[30];
1016 int CharsPrinted =
1017 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
1018 assert(CharsPrinted >= 0 &&
1019 (size_t)CharsPrinted < llvm::array_lengthof(buf));
1020 (void)CharsPrinted; // avoid warnings if asserts are disabled
Jim Stichnothbca2f652014-11-01 10:13:54 -07001021 Str << ".L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
Jim Stichnothf61d5b22014-05-23 13:31:24 -07001022 Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
1023 << Value << "\n";
1024 }
1025}
1026
1027void TargetX8632::emitConstants() const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08001028 // Note: Still used by emit IAS.
Jim Stichnoth31c95592014-12-19 12:51:35 -08001029 emitConstantPool<PoolTypeConverter<float>>();
1030 emitConstantPool<PoolTypeConverter<double>>();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07001031
1032 // No need to emit constants from the int pool since (for x86) they
1033 // are embedded as immediates in the instructions.
1034}
1035
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001036void TargetX8632::split64(Variable *Var) {
1037 switch (Var->getType()) {
1038 default:
1039 return;
1040 case IceType_i64:
1041 // TODO: Only consider F64 if we need to push each half when
1042 // passing as an argument to a function call. Note that each half
1043 // is still typed as I32.
1044 case IceType_f64:
1045 break;
1046 }
1047 Variable *Lo = Var->getLo();
1048 Variable *Hi = Var->getHi();
1049 if (Lo) {
1050 assert(Hi);
1051 return;
1052 }
Jim Stichnothae953202014-12-20 06:17:49 -08001053 assert(Hi == nullptr);
Jim Stichnoth9a04c072014-12-11 15:51:42 -08001054 Lo = Func->makeVariable(IceType_i32);
1055 Hi = Func->makeVariable(IceType_i32);
1056 if (ALLOW_DUMP) {
1057 Lo->setName(Func, Var->getName(Func) + "__lo");
1058 Hi->setName(Func, Var->getName(Func) + "__hi");
1059 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001060 Var->setLoHi(Lo, Hi);
1061 if (Var->getIsArg()) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07001062 Lo->setIsArg();
1063 Hi->setIsArg();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001064 }
1065}
1066
1067Operand *TargetX8632::loOperand(Operand *Operand) {
1068 assert(Operand->getType() == IceType_i64);
1069 if (Operand->getType() != IceType_i64)
1070 return Operand;
1071 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1072 split64(Var);
1073 return Var->getLo();
1074 }
Jan Voungbc004632014-09-16 15:09:10 -07001075 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001076 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001077 }
1078 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1079 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
1080 Mem->getOffset(), Mem->getIndex(),
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001081 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001082 }
1083 llvm_unreachable("Unsupported operand type");
Jim Stichnothae953202014-12-20 06:17:49 -08001084 return nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001085}
1086
1087Operand *TargetX8632::hiOperand(Operand *Operand) {
1088 assert(Operand->getType() == IceType_i64);
1089 if (Operand->getType() != IceType_i64)
1090 return Operand;
1091 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1092 split64(Var);
1093 return Var->getHi();
1094 }
Jan Voungbc004632014-09-16 15:09:10 -07001095 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1096 return Ctx->getConstantInt32(
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001097 static_cast<uint32_t>(Const->getValue() >> 32));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001098 }
1099 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1100 Constant *Offset = Mem->getOffset();
Jim Stichnothae953202014-12-20 06:17:49 -08001101 if (Offset == nullptr) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001102 Offset = Ctx->getConstantInt32(4);
1103 } else if (ConstantInteger32 *IntOffset =
1104 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1105 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001106 } else if (ConstantRelocatable *SymOffset =
1107 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
Jan Voungfe14fb82014-10-13 15:56:32 -07001108 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001109 Offset =
1110 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
1111 SymOffset->getSuppressMangling());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001112 }
1113 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001114 Mem->getIndex(), Mem->getShift(),
1115 Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001116 }
1117 llvm_unreachable("Unsupported operand type");
Jim Stichnothae953202014-12-20 06:17:49 -08001118 return nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001119}
1120
1121llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
1122 RegSetMask Exclude) const {
Jan Voungbd385e42014-09-18 18:18:10 -07001123 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001124
Jan Voungbd385e42014-09-18 18:18:10 -07001125#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001126 frameptr, isI8, isInt, isFP) \
1127 if (scratch && (Include & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001128 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001129 if (preserved && (Include & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001130 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001131 if (stackptr && (Include & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001132 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001133 if (frameptr && (Include & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001134 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001135 if (scratch && (Exclude & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001136 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001137 if (preserved && (Exclude & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001138 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001139 if (stackptr && (Exclude & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001140 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001141 if (frameptr && (Exclude & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001142 Registers[RegX8632::val] = false;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001143
1144 REGX8632_TABLE
1145
1146#undef X
1147
1148 return Registers;
1149}
1150
1151void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
1152 IsEbpBasedFrame = true;
Matt Wala105b7042014-08-11 19:56:19 -07001153 // Conservatively require the stack to be aligned. Some stack
1154 // adjustment operations implemented below assume that the stack is
1155 // aligned before the alloca. All the alloca code ensures that the
1156 // stack alignment is preserved after the alloca. The stack alignment
1157 // restriction can be relaxed in some cases.
1158 NeedsStackAlignment = true;
1159
1160 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.
Jan Voungbd385e42014-09-18 18:18:10 -07001161 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001162 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1163 Variable *Dest = Inst->getDest();
Matt Wala105b7042014-08-11 19:56:19 -07001164 uint32_t AlignmentParam = Inst->getAlignInBytes();
Jim Stichnoth72a8f8d2014-09-08 17:56:50 -07001165 // For default align=0, set it to the real value 1, to avoid any
1166 // bit-manipulation problems below.
1167 AlignmentParam = std::max(AlignmentParam, 1u);
Matt Wala105b7042014-08-11 19:56:19 -07001168
1169 // LLVM enforces power of 2 alignment.
1170 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
1171 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
1172
1173 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1174 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001175 _and(esp, Ctx->getConstantInt32(-Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001176 }
Jan Voungbc004632014-09-16 15:09:10 -07001177 if (ConstantInteger32 *ConstantTotalSize =
1178 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
Matt Wala105b7042014-08-11 19:56:19 -07001179 uint32_t Value = ConstantTotalSize->getValue();
Matt Walad4799f42014-08-14 14:24:12 -07001180 Value = applyAlignment(Value, Alignment);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001181 _sub(esp, Ctx->getConstantInt32(Value));
Matt Wala105b7042014-08-11 19:56:19 -07001182 } else {
1183 // Non-constant sizes need to be adjusted to the next highest
1184 // multiple of the required alignment at runtime.
1185 Variable *T = makeReg(IceType_i32);
1186 _mov(T, TotalSize);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001187 _add(T, Ctx->getConstantInt32(Alignment - 1));
1188 _and(T, Ctx->getConstantInt32(-Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001189 _sub(esp, T);
1190 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001191 _mov(Dest, esp);
1192}
1193
1194void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1195 Variable *Dest = Inst->getDest();
1196 Operand *Src0 = legalize(Inst->getSrc(0));
1197 Operand *Src1 = legalize(Inst->getSrc(1));
1198 if (Dest->getType() == IceType_i64) {
1199 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1200 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1201 Operand *Src0Lo = loOperand(Src0);
1202 Operand *Src0Hi = hiOperand(Src0);
1203 Operand *Src1Lo = loOperand(Src1);
1204 Operand *Src1Hi = hiOperand(Src1);
Jim Stichnothae953202014-12-20 06:17:49 -08001205 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001206 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001207 case InstArithmetic::_num:
1208 llvm_unreachable("Unknown arithmetic operator");
1209 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001210 case InstArithmetic::Add:
1211 _mov(T_Lo, Src0Lo);
1212 _add(T_Lo, Src1Lo);
1213 _mov(DestLo, T_Lo);
1214 _mov(T_Hi, Src0Hi);
1215 _adc(T_Hi, Src1Hi);
1216 _mov(DestHi, T_Hi);
1217 break;
1218 case InstArithmetic::And:
1219 _mov(T_Lo, Src0Lo);
1220 _and(T_Lo, Src1Lo);
1221 _mov(DestLo, T_Lo);
1222 _mov(T_Hi, Src0Hi);
1223 _and(T_Hi, Src1Hi);
1224 _mov(DestHi, T_Hi);
1225 break;
1226 case InstArithmetic::Or:
1227 _mov(T_Lo, Src0Lo);
1228 _or(T_Lo, Src1Lo);
1229 _mov(DestLo, T_Lo);
1230 _mov(T_Hi, Src0Hi);
1231 _or(T_Hi, Src1Hi);
1232 _mov(DestHi, T_Hi);
1233 break;
1234 case InstArithmetic::Xor:
1235 _mov(T_Lo, Src0Lo);
1236 _xor(T_Lo, Src1Lo);
1237 _mov(DestLo, T_Lo);
1238 _mov(T_Hi, Src0Hi);
1239 _xor(T_Hi, Src1Hi);
1240 _mov(DestHi, T_Hi);
1241 break;
1242 case InstArithmetic::Sub:
1243 _mov(T_Lo, Src0Lo);
1244 _sub(T_Lo, Src1Lo);
1245 _mov(DestLo, T_Lo);
1246 _mov(T_Hi, Src0Hi);
1247 _sbb(T_Hi, Src1Hi);
1248 _mov(DestHi, T_Hi);
1249 break;
1250 case InstArithmetic::Mul: {
Jim Stichnothae953202014-12-20 06:17:49 -08001251 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jan Voungbd385e42014-09-18 18:18:10 -07001252 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
1253 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001254 // gcc does the following:
1255 // a=b*c ==>
1256 // t1 = b.hi; t1 *=(imul) c.lo
1257 // t2 = c.hi; t2 *=(imul) b.lo
1258 // t3:eax = b.lo
1259 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1260 // a.lo = t4.lo
1261 // t4.hi += t1
1262 // t4.hi += t2
1263 // a.hi = t4.hi
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07001264 // The mul instruction cannot take an immediate operand.
1265 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001266 _mov(T_1, Src0Hi);
1267 _imul(T_1, Src1Lo);
1268 _mov(T_2, Src1Hi);
1269 _imul(T_2, Src0Lo);
Jan Voungbd385e42014-09-18 18:18:10 -07001270 _mov(T_3, Src0Lo, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001271 _mul(T_4Lo, T_3, Src1Lo);
1272 // The mul instruction produces two dest variables, edx:eax. We
1273 // create a fake definition of edx to account for this.
1274 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1275 _mov(DestLo, T_4Lo);
1276 _add(T_4Hi, T_1);
1277 _add(T_4Hi, T_2);
1278 _mov(DestHi, T_4Hi);
1279 } break;
1280 case InstArithmetic::Shl: {
1281 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1282 // gcc does the following:
1283 // a=b<<c ==>
1284 // t1:ecx = c.lo & 0xff
1285 // t2 = b.lo
1286 // t3 = b.hi
1287 // t3 = shld t3, t2, t1
1288 // t2 = shl t2, t1
1289 // test t1, 0x20
1290 // je L1
1291 // use(t3)
1292 // t3 = t2
1293 // t2 = 0
1294 // L1:
1295 // a.lo = t2
1296 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001297 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001298 Constant *BitTest = Ctx->getConstantInt32(0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001299 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001300 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001301 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001302 _mov(T_2, Src0Lo);
1303 _mov(T_3, Src0Hi);
1304 _shld(T_3, T_2, T_1);
1305 _shl(T_2, T_1);
1306 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001307 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001308 // T_2 and T_3 are being assigned again because of the
1309 // intra-block control flow, so we need the _mov_nonkillable
1310 // variant to avoid liveness problems.
1311 _mov_nonkillable(T_3, T_2);
1312 _mov_nonkillable(T_2, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001313 Context.insert(Label);
1314 _mov(DestLo, T_2);
1315 _mov(DestHi, T_3);
1316 } break;
1317 case InstArithmetic::Lshr: {
1318 // a=b>>c (unsigned) ==>
1319 // t1:ecx = c.lo & 0xff
1320 // t2 = b.lo
1321 // t3 = b.hi
1322 // t2 = shrd t2, t3, t1
1323 // t3 = shr t3, t1
1324 // test t1, 0x20
1325 // je L1
1326 // use(t2)
1327 // t2 = t3
1328 // t3 = 0
1329 // L1:
1330 // a.lo = t2
1331 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001332 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001333 Constant *BitTest = Ctx->getConstantInt32(0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001334 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001335 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001336 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001337 _mov(T_2, Src0Lo);
1338 _mov(T_3, Src0Hi);
1339 _shrd(T_2, T_3, T_1);
1340 _shr(T_3, T_1);
1341 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001342 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001343 // T_2 and T_3 are being assigned again because of the
1344 // intra-block control flow, so we need the _mov_nonkillable
1345 // variant to avoid liveness problems.
1346 _mov_nonkillable(T_2, T_3);
1347 _mov_nonkillable(T_3, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001348 Context.insert(Label);
1349 _mov(DestLo, T_2);
1350 _mov(DestHi, T_3);
1351 } break;
1352 case InstArithmetic::Ashr: {
1353 // a=b>>c (signed) ==>
1354 // t1:ecx = c.lo & 0xff
1355 // t2 = b.lo
1356 // t3 = b.hi
1357 // t2 = shrd t2, t3, t1
1358 // t3 = sar t3, t1
1359 // test t1, 0x20
1360 // je L1
1361 // use(t2)
1362 // t2 = t3
1363 // t3 = sar t3, 0x1f
1364 // L1:
1365 // a.lo = t2
1366 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001367 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001368 Constant *BitTest = Ctx->getConstantInt32(0x20);
1369 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001370 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001371 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001372 _mov(T_2, Src0Lo);
1373 _mov(T_3, Src0Hi);
1374 _shrd(T_2, T_3, T_1);
1375 _sar(T_3, T_1);
1376 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001377 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001378 // T_2 and T_3 are being assigned again because of the
1379 // intra-block control flow, so T_2 needs the _mov_nonkillable
1380 // variant to avoid liveness problems. T_3 doesn't need special
1381 // treatment because it is reassigned via _sar instead of _mov.
1382 _mov_nonkillable(T_2, T_3);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001383 _sar(T_3, SignExtend);
1384 Context.insert(Label);
1385 _mov(DestLo, T_2);
1386 _mov(DestHi, T_3);
1387 } break;
1388 case InstArithmetic::Udiv: {
1389 const SizeT MaxSrcs = 2;
1390 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
1391 Call->addArg(Inst->getSrc(0));
1392 Call->addArg(Inst->getSrc(1));
1393 lowerCall(Call);
1394 } break;
1395 case InstArithmetic::Sdiv: {
1396 const SizeT MaxSrcs = 2;
1397 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
1398 Call->addArg(Inst->getSrc(0));
1399 Call->addArg(Inst->getSrc(1));
1400 lowerCall(Call);
1401 } break;
1402 case InstArithmetic::Urem: {
1403 const SizeT MaxSrcs = 2;
1404 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
1405 Call->addArg(Inst->getSrc(0));
1406 Call->addArg(Inst->getSrc(1));
1407 lowerCall(Call);
1408 } break;
1409 case InstArithmetic::Srem: {
1410 const SizeT MaxSrcs = 2;
1411 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
1412 Call->addArg(Inst->getSrc(0));
1413 Call->addArg(Inst->getSrc(1));
1414 lowerCall(Call);
1415 } break;
1416 case InstArithmetic::Fadd:
1417 case InstArithmetic::Fsub:
1418 case InstArithmetic::Fmul:
1419 case InstArithmetic::Fdiv:
1420 case InstArithmetic::Frem:
1421 llvm_unreachable("FP instruction with i64 type");
1422 break;
1423 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001424 } else if (isVectorType(Dest->getType())) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001425 // TODO: Trap on integer divide and integer modulo by zero.
1426 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
Matt Wala8d1072e2014-07-11 15:43:51 -07001427 switch (Inst->getOp()) {
1428 case InstArithmetic::_num:
1429 llvm_unreachable("Unknown arithmetic operator");
1430 break;
Matt Wala7fa22d82014-07-17 12:41:31 -07001431 case InstArithmetic::Add: {
1432 Variable *T = makeReg(Dest->getType());
1433 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001434 _padd(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001435 _movp(Dest, T);
1436 } break;
1437 case InstArithmetic::And: {
1438 Variable *T = makeReg(Dest->getType());
1439 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001440 _pand(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001441 _movp(Dest, T);
1442 } break;
1443 case InstArithmetic::Or: {
1444 Variable *T = makeReg(Dest->getType());
1445 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001446 _por(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001447 _movp(Dest, T);
1448 } break;
1449 case InstArithmetic::Xor: {
1450 Variable *T = makeReg(Dest->getType());
1451 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001452 _pxor(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001453 _movp(Dest, T);
1454 } break;
1455 case InstArithmetic::Sub: {
1456 Variable *T = makeReg(Dest->getType());
1457 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001458 _psub(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001459 _movp(Dest, T);
1460 } break;
1461 case InstArithmetic::Mul: {
Matt Wala0a450512014-07-30 12:44:39 -07001462 bool TypesAreValidForPmull =
1463 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1464 bool InstructionSetIsValidForPmull =
1465 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1466 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1467 Variable *T = makeReg(Dest->getType());
1468 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001469 _pmull(T, Src1);
Matt Wala0a450512014-07-30 12:44:39 -07001470 _movp(Dest, T);
1471 } else if (Dest->getType() == IceType_v4i32) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001472 // Lowering sequence:
1473 // Note: The mask arguments have index 0 on the left.
1474 //
1475 // movups T1, Src0
1476 // pshufd T2, Src0, {1,0,3,0}
1477 // pshufd T3, Src1, {1,0,3,0}
1478 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1479 // pmuludq T1, Src1
1480 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1481 // pmuludq T2, T3
1482 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1483 // shufps T1, T2, {0,2,0,2}
1484 // pshufd T4, T1, {0,2,1,3}
1485 // movups Dest, T4
Matt Wala7fa22d82014-07-17 12:41:31 -07001486
1487 // Mask that directs pshufd to create a vector with entries
1488 // Src[1, 0, 3, 0]
1489 const unsigned Constant1030 = 0x31;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001490 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
Matt Wala7fa22d82014-07-17 12:41:31 -07001491 // Mask that directs shufps to create a vector with entries
1492 // Dest[0, 2], Src[0, 2]
1493 const unsigned Mask0202 = 0x88;
1494 // Mask that directs pshufd to create a vector with entries
1495 // Src[0, 2, 1, 3]
1496 const unsigned Mask0213 = 0xd8;
1497 Variable *T1 = makeReg(IceType_v4i32);
1498 Variable *T2 = makeReg(IceType_v4i32);
1499 Variable *T3 = makeReg(IceType_v4i32);
1500 Variable *T4 = makeReg(IceType_v4i32);
1501 _movp(T1, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001502 _pshufd(T2, Src0, Mask1030);
1503 _pshufd(T3, Src1, Mask1030);
1504 _pmuludq(T1, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001505 _pmuludq(T2, T3);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001506 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1507 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
Matt Wala7fa22d82014-07-17 12:41:31 -07001508 _movp(Dest, T4);
Matt Wala7fa22d82014-07-17 12:41:31 -07001509 } else {
1510 assert(Dest->getType() == IceType_v16i8);
Matt Walaafeaee42014-08-07 13:47:30 -07001511 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001512 }
1513 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001514 case InstArithmetic::Shl:
1515 case InstArithmetic::Lshr:
1516 case InstArithmetic::Ashr:
1517 case InstArithmetic::Udiv:
1518 case InstArithmetic::Urem:
1519 case InstArithmetic::Sdiv:
1520 case InstArithmetic::Srem:
1521 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1522 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001523 case InstArithmetic::Fadd: {
1524 Variable *T = makeReg(Dest->getType());
1525 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001526 _addps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001527 _movp(Dest, T);
1528 } break;
1529 case InstArithmetic::Fsub: {
1530 Variable *T = makeReg(Dest->getType());
1531 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001532 _subps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001533 _movp(Dest, T);
1534 } break;
1535 case InstArithmetic::Fmul: {
1536 Variable *T = makeReg(Dest->getType());
1537 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001538 _mulps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001539 _movp(Dest, T);
1540 } break;
1541 case InstArithmetic::Fdiv: {
1542 Variable *T = makeReg(Dest->getType());
1543 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001544 _divps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001545 _movp(Dest, T);
1546 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001547 case InstArithmetic::Frem:
1548 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1549 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001550 }
1551 } else { // Dest->getType() is non-i64 scalar
Jim Stichnothae953202014-12-20 06:17:49 -08001552 Variable *T_edx = nullptr;
1553 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001554 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001555 case InstArithmetic::_num:
1556 llvm_unreachable("Unknown arithmetic operator");
1557 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001558 case InstArithmetic::Add:
1559 _mov(T, Src0);
1560 _add(T, Src1);
1561 _mov(Dest, T);
1562 break;
1563 case InstArithmetic::And:
1564 _mov(T, Src0);
1565 _and(T, Src1);
1566 _mov(Dest, T);
1567 break;
1568 case InstArithmetic::Or:
1569 _mov(T, Src0);
1570 _or(T, Src1);
1571 _mov(Dest, T);
1572 break;
1573 case InstArithmetic::Xor:
1574 _mov(T, Src0);
1575 _xor(T, Src1);
1576 _mov(Dest, T);
1577 break;
1578 case InstArithmetic::Sub:
1579 _mov(T, Src0);
1580 _sub(T, Src1);
1581 _mov(Dest, T);
1582 break;
1583 case InstArithmetic::Mul:
1584 // TODO: Optimize for llvm::isa<Constant>(Src1)
1585 // TODO: Strength-reduce multiplications by a constant,
1586 // particularly -1 and powers of 2. Advanced: use lea to
1587 // multiply by 3, 5, 9.
1588 //
1589 // The 8-bit version of imul only allows the form "imul r/m8"
1590 // where T must be in eax.
Jan Voung0ac50dc2014-09-30 08:36:06 -07001591 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001592 _mov(T, Src0, RegX8632::Reg_eax);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001593 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1594 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001595 _mov(T, Src0);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001596 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001597 _imul(T, Src1);
1598 _mov(Dest, T);
1599 break;
1600 case InstArithmetic::Shl:
1601 _mov(T, Src0);
1602 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001603 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001604 _shl(T, Src1);
1605 _mov(Dest, T);
1606 break;
1607 case InstArithmetic::Lshr:
1608 _mov(T, Src0);
1609 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001610 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001611 _shr(T, Src1);
1612 _mov(Dest, T);
1613 break;
1614 case InstArithmetic::Ashr:
1615 _mov(T, Src0);
1616 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001617 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001618 _sar(T, Src1);
1619 _mov(Dest, T);
1620 break;
1621 case InstArithmetic::Udiv:
Jan Voung70d68832014-06-17 10:02:37 -07001622 // div and idiv are the few arithmetic operators that do not allow
1623 // immediates as the operand.
1624 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001625 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnothae953202014-12-20 06:17:49 -08001626 Variable *T_ah = nullptr;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001627 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001628 _mov(T, Src0, RegX8632::Reg_eax);
1629 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001630 _div(T, Src1, T_ah);
1631 _mov(Dest, T);
1632 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001633 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001634 _mov(T, Src0, RegX8632::Reg_eax);
1635 _mov(T_edx, Zero, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001636 _div(T, Src1, T_edx);
1637 _mov(Dest, T);
1638 }
1639 break;
1640 case InstArithmetic::Sdiv:
Jan Voung70d68832014-06-17 10:02:37 -07001641 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001642 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001643 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001644 _cbwdq(T, T);
1645 _idiv(T, Src1, T);
1646 _mov(Dest, T);
1647 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001648 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1649 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001650 _cbwdq(T_edx, T);
1651 _idiv(T, Src1, T_edx);
1652 _mov(Dest, T);
1653 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001654 break;
1655 case InstArithmetic::Urem:
Jan Voung70d68832014-06-17 10:02:37 -07001656 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001657 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnothae953202014-12-20 06:17:49 -08001658 Variable *T_ah = nullptr;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001659 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001660 _mov(T, Src0, RegX8632::Reg_eax);
1661 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001662 _div(T_ah, Src1, T);
1663 _mov(Dest, T_ah);
1664 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001665 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001666 _mov(T_edx, Zero, RegX8632::Reg_edx);
1667 _mov(T, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001668 _div(T_edx, Src1, T);
1669 _mov(Dest, T_edx);
1670 }
1671 break;
1672 case InstArithmetic::Srem:
Jan Voung70d68832014-06-17 10:02:37 -07001673 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001674 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001675 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
1676 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001677 _cbwdq(T, T);
1678 Context.insert(InstFakeDef::create(Func, T_ah));
1679 _idiv(T_ah, Src1, T);
1680 _mov(Dest, T_ah);
1681 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001682 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1683 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001684 _cbwdq(T_edx, T);
1685 _idiv(T_edx, Src1, T);
1686 _mov(Dest, T_edx);
1687 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001688 break;
1689 case InstArithmetic::Fadd:
1690 _mov(T, Src0);
1691 _addss(T, Src1);
1692 _mov(Dest, T);
1693 break;
1694 case InstArithmetic::Fsub:
1695 _mov(T, Src0);
1696 _subss(T, Src1);
1697 _mov(Dest, T);
1698 break;
1699 case InstArithmetic::Fmul:
1700 _mov(T, Src0);
1701 _mulss(T, Src1);
1702 _mov(Dest, T);
1703 break;
1704 case InstArithmetic::Fdiv:
1705 _mov(T, Src0);
1706 _divss(T, Src1);
1707 _mov(Dest, T);
1708 break;
1709 case InstArithmetic::Frem: {
1710 const SizeT MaxSrcs = 2;
1711 Type Ty = Dest->getType();
Jan Voung3a569182014-09-29 10:16:01 -07001712 InstCall *Call = makeHelperCall(
1713 isFloat32Asserting32Or64(Ty) ? "fmodf" : "fmod", Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001714 Call->addArg(Src0);
1715 Call->addArg(Src1);
1716 return lowerCall(Call);
1717 } break;
1718 }
1719 }
1720}
1721
1722void TargetX8632::lowerAssign(const InstAssign *Inst) {
1723 Variable *Dest = Inst->getDest();
1724 Operand *Src0 = Inst->getSrc(0);
1725 assert(Dest->getType() == Src0->getType());
1726 if (Dest->getType() == IceType_i64) {
1727 Src0 = legalize(Src0);
1728 Operand *Src0Lo = loOperand(Src0);
1729 Operand *Src0Hi = hiOperand(Src0);
1730 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1731 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Jim Stichnothae953202014-12-20 06:17:49 -08001732 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001733 _mov(T_Lo, Src0Lo);
1734 _mov(DestLo, T_Lo);
1735 _mov(T_Hi, Src0Hi);
1736 _mov(DestHi, T_Hi);
1737 } else {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07001738 // If Dest is in memory, then RI is either a physical register or
1739 // an immediate, otherwise RI can be anything.
1740 Operand *RI =
1741 legalize(Src0, Dest->hasReg() ? Legal_All : Legal_Reg | Legal_Imm);
Matt Wala45a06232014-07-09 16:33:22 -07001742 if (isVectorType(Dest->getType()))
1743 _movp(Dest, RI);
1744 else
1745 _mov(Dest, RI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001746 }
1747}
1748
1749void TargetX8632::lowerBr(const InstBr *Inst) {
1750 if (Inst->isUnconditional()) {
1751 _br(Inst->getTargetUnconditional());
1752 } else {
Jim Stichnoth206833c2014-08-07 10:58:05 -07001753 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001754 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001755 _cmp(Src0, Zero);
Jan Voungbd385e42014-09-18 18:18:10 -07001756 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001757 }
1758}
1759
1760void TargetX8632::lowerCall(const InstCall *Instr) {
Matt Wala105b7042014-08-11 19:56:19 -07001761 // x86-32 calling convention:
1762 //
1763 // * At the point before the call, the stack must be aligned to 16
1764 // bytes.
1765 //
1766 // * The first four arguments of vector type, regardless of their
1767 // position relative to the other arguments in the argument list, are
1768 // placed in registers xmm0 - xmm3.
1769 //
1770 // * Other arguments are pushed onto the stack in right-to-left order,
1771 // such that the left-most argument ends up on the top of the stack at
1772 // the lowest memory address.
1773 //
1774 // * Stack arguments of vector type are aligned to start at the next
1775 // highest multiple of 16 bytes. Other stack arguments are aligned to
1776 // 4 bytes.
1777 //
1778 // This intends to match the section "IA-32 Function Calling
1779 // Convention" of the document "OS X ABI Function Call Guide" by
1780 // Apple.
1781 NeedsStackAlignment = true;
1782
Jim Stichnoth1502e592014-12-11 09:22:45 -08001783 typedef std::vector<Operand *> OperandList;
Matt Wala105b7042014-08-11 19:56:19 -07001784 OperandList XmmArgs;
1785 OperandList StackArgs, StackArgLocations;
1786 uint32_t ParameterAreaSizeBytes = 0;
1787
Matt Wala45a06232014-07-09 16:33:22 -07001788 // Classify each argument operand according to the location where the
1789 // argument is passed.
Matt Wala45a06232014-07-09 16:33:22 -07001790 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1791 Operand *Arg = Instr->getArg(i);
Matt Wala105b7042014-08-11 19:56:19 -07001792 Type Ty = Arg->getType();
1793 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
Jan Voung3a569182014-09-29 10:16:01 -07001794 assert(typeWidthInBytes(Ty) >= 4);
Matt Wala105b7042014-08-11 19:56:19 -07001795 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
Matt Wala45a06232014-07-09 16:33:22 -07001796 XmmArgs.push_back(Arg);
1797 } else {
1798 StackArgs.push_back(Arg);
Matt Wala105b7042014-08-11 19:56:19 -07001799 if (isVectorType(Arg->getType())) {
1800 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1801 }
Jan Voungbd385e42014-09-18 18:18:10 -07001802 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001803 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
Matt Wala105b7042014-08-11 19:56:19 -07001804 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1805 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
Matt Wala45a06232014-07-09 16:33:22 -07001806 }
1807 }
Matt Wala105b7042014-08-11 19:56:19 -07001808
1809 // Adjust the parameter area so that the stack is aligned. It is
1810 // assumed that the stack is already aligned at the start of the
1811 // calling sequence.
1812 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1813
1814 // Subtract the appropriate amount for the argument area. This also
1815 // takes care of setting the stack adjustment during emission.
Matt Wala45a06232014-07-09 16:33:22 -07001816 //
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001817 // TODO: If for some reason the call instruction gets dead-code
1818 // eliminated after lowering, we would need to ensure that the
Matt Wala105b7042014-08-11 19:56:19 -07001819 // pre-call and the post-call esp adjustment get eliminated as well.
1820 if (ParameterAreaSizeBytes) {
1821 _adjust_stack(ParameterAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001822 }
Matt Wala105b7042014-08-11 19:56:19 -07001823
1824 // Copy arguments that are passed on the stack to the appropriate
1825 // stack locations.
1826 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
1827 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
Matt Wala105b7042014-08-11 19:56:19 -07001828 }
1829
Matt Wala45a06232014-07-09 16:33:22 -07001830 // Copy arguments to be passed in registers to the appropriate
1831 // registers.
1832 // TODO: Investigate the impact of lowering arguments passed in
1833 // registers after lowering stack arguments as opposed to the other
1834 // way around. Lowering register arguments after stack arguments may
1835 // reduce register pressure. On the other hand, lowering register
1836 // arguments first (before stack arguments) may result in more compact
1837 // code, as the memory operand displacements may end up being smaller
1838 // before any stack adjustment is done.
1839 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Jim Stichnothad403532014-09-25 12:44:17 -07001840 Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
Matt Wala45a06232014-07-09 16:33:22 -07001841 // Generate a FakeUse of register arguments so that they do not get
1842 // dead code eliminated as a result of the FakeKill of scratch
1843 // registers after the call.
1844 Context.insert(InstFakeUse::create(Func, Reg));
1845 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001846 // Generate the call instruction. Assign its result to a temporary
1847 // with high register allocation weight.
1848 Variable *Dest = Instr->getDest();
Matt Wala45a06232014-07-09 16:33:22 -07001849 // ReturnReg doubles as ReturnRegLo as necessary.
Jim Stichnothae953202014-12-20 06:17:49 -08001850 Variable *ReturnReg = nullptr;
1851 Variable *ReturnRegHi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001852 if (Dest) {
1853 switch (Dest->getType()) {
1854 case IceType_NUM:
1855 llvm_unreachable("Invalid Call dest type");
1856 break;
1857 case IceType_void:
1858 break;
1859 case IceType_i1:
1860 case IceType_i8:
1861 case IceType_i16:
1862 case IceType_i32:
Jan Voungbd385e42014-09-18 18:18:10 -07001863 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001864 break;
1865 case IceType_i64:
Jan Voungbd385e42014-09-18 18:18:10 -07001866 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
1867 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001868 break;
1869 case IceType_f32:
1870 case IceType_f64:
Jim Stichnothae953202014-12-20 06:17:49 -08001871 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
Matt Wala45a06232014-07-09 16:33:22 -07001872 // the fstp instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001873 break;
Matt Wala928f1292014-07-07 16:50:46 -07001874 case IceType_v4i1:
1875 case IceType_v8i1:
1876 case IceType_v16i1:
1877 case IceType_v16i8:
1878 case IceType_v8i16:
1879 case IceType_v4i32:
Matt Wala45a06232014-07-09 16:33:22 -07001880 case IceType_v4f32:
Jan Voungbd385e42014-09-18 18:18:10 -07001881 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
Matt Wala45a06232014-07-09 16:33:22 -07001882 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001883 }
1884 }
Jim Stichnothdd165072014-11-02 09:41:45 -08001885 Operand *CallTarget = legalize(Instr->getCallTarget());
Matt Wala45a06232014-07-09 16:33:22 -07001886 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001887 Context.insert(NewCall);
Matt Wala45a06232014-07-09 16:33:22 -07001888 if (ReturnRegHi)
1889 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001890
Matt Wala105b7042014-08-11 19:56:19 -07001891 // Add the appropriate offset to esp. The call instruction takes care
1892 // of resetting the stack offset during emission.
1893 if (ParameterAreaSizeBytes) {
Jan Voungbd385e42014-09-18 18:18:10 -07001894 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001895 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001896 }
1897
1898 // Insert a register-kill pseudo instruction.
Jim Stichnoth87ff3a12014-11-14 10:27:29 -08001899 Context.insert(InstFakeKill::create(Func, NewCall));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001900
1901 // Generate a FakeUse to keep the call live if necessary.
Matt Wala45a06232014-07-09 16:33:22 -07001902 if (Instr->hasSideEffects() && ReturnReg) {
1903 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001904 Context.insert(FakeUse);
1905 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001906
Matt Wala45a06232014-07-09 16:33:22 -07001907 if (!Dest)
1908 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001909
Matt Wala45a06232014-07-09 16:33:22 -07001910 // Assign the result of the call to Dest.
1911 if (ReturnReg) {
1912 if (ReturnRegHi) {
1913 assert(Dest->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001914 split64(Dest);
1915 Variable *DestLo = Dest->getLo();
1916 Variable *DestHi = Dest->getHi();
Matt Wala45a06232014-07-09 16:33:22 -07001917 _mov(DestLo, ReturnReg);
1918 _mov(DestHi, ReturnRegHi);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001919 } else {
Matt Wala45a06232014-07-09 16:33:22 -07001920 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1921 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1922 isVectorType(Dest->getType()));
Matt Wala45a06232014-07-09 16:33:22 -07001923 if (isVectorType(Dest->getType())) {
1924 _movp(Dest, ReturnReg);
1925 } else {
1926 _mov(Dest, ReturnReg);
1927 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001928 }
Jan Voung3a569182014-09-29 10:16:01 -07001929 } else if (isScalarFloatingType(Dest->getType())) {
Matt Wala45a06232014-07-09 16:33:22 -07001930 // Special treatment for an FP function which returns its result in
1931 // st(0).
Matt Wala45a06232014-07-09 16:33:22 -07001932 // If Dest ends up being a physical xmm register, the fstp emit code
1933 // will route st(0) through a temporary stack slot.
Jim Stichnotha5229722014-09-12 13:06:09 -07001934 _fstp(Dest);
1935 // Create a fake use of Dest in case it actually isn't used,
1936 // because st(0) still needs to be popped.
1937 Context.insert(InstFakeUse::create(Func, Dest));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001938 }
1939}
1940
1941void TargetX8632::lowerCast(const InstCast *Inst) {
1942 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1943 InstCast::OpKind CastKind = Inst->getCastKind();
1944 Variable *Dest = Inst->getDest();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001945 switch (CastKind) {
1946 default:
1947 Func->setError("Cast type not supported");
1948 return;
Jan Voung1ee34162014-06-24 13:43:30 -07001949 case InstCast::Sext: {
1950 // Src0RM is the source operand legalized to physical register or memory,
1951 // but not immediate, since the relevant x86 native instructions don't
1952 // allow an immediate operand. If the operand is an immediate, we could
1953 // consider computing the strength-reduced result at translation time,
1954 // but we're unlikely to see something like that in the bitcode that
1955 // the optimizer wouldn't have already taken care of.
1956 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001957 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07001958 Type DestTy = Dest->getType();
1959 if (DestTy == IceType_v16i8) {
1960 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
1961 Variable *OneMask = makeVectorOfOnes(Dest->getType());
1962 Variable *T = makeReg(DestTy);
1963 _movp(T, Src0RM);
1964 _pand(T, OneMask);
1965 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1966 _pcmpgt(T, Zeros);
1967 _movp(Dest, T);
1968 } else {
1969 // width = width(elty) - 1; dest = (src << width) >> width
1970 SizeT ShiftAmount =
1971 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001972 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
Matt Wala83b80362014-07-16 10:21:30 -07001973 Variable *T = makeReg(DestTy);
1974 _movp(T, Src0RM);
1975 _psll(T, ShiftConstant);
1976 _psra(T, ShiftConstant);
1977 _movp(Dest, T);
1978 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07001979 } else if (Dest->getType() == IceType_i64) {
1980 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001981 Constant *Shift = Ctx->getConstantInt32(31);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001982 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1983 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1984 Variable *T_Lo = makeReg(DestLo->getType());
1985 if (Src0RM->getType() == IceType_i32) {
1986 _mov(T_Lo, Src0RM);
1987 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001988 _movzx(T_Lo, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001989 _shl(T_Lo, Shift);
1990 _sar(T_Lo, Shift);
1991 } else {
1992 _movsx(T_Lo, Src0RM);
1993 }
1994 _mov(DestLo, T_Lo);
Jim Stichnothae953202014-12-20 06:17:49 -08001995 Variable *T_Hi = nullptr;
Jim Stichnothdd30c812014-09-04 16:39:02 -07001996 _mov(T_Hi, T_Lo);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001997 if (Src0RM->getType() != IceType_i1)
1998 // For i1, the sar instruction is already done above.
1999 _sar(T_Hi, Shift);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002000 _mov(DestHi, T_Hi);
2001 } else if (Src0RM->getType() == IceType_i1) {
2002 // t1 = src
2003 // shl t1, dst_bitwidth - 1
2004 // sar t1, dst_bitwidth - 1
2005 // dst = t1
2006 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002007 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002008 Variable *T = makeReg(Dest->getType());
2009 if (typeWidthInBytes(Dest->getType()) <=
2010 typeWidthInBytes(Src0RM->getType())) {
2011 _mov(T, Src0RM);
2012 } else {
2013 // Widen the source using movsx or movzx. (It doesn't matter
2014 // which one, since the following shl/sar overwrite the bits.)
2015 _movzx(T, Src0RM);
2016 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002017 _shl(T, ShiftAmount);
2018 _sar(T, ShiftAmount);
2019 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002020 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002021 // t1 = movsx src; dst = t1
2022 Variable *T = makeReg(Dest->getType());
2023 _movsx(T, Src0RM);
2024 _mov(Dest, T);
2025 }
2026 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002027 }
2028 case InstCast::Zext: {
2029 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002030 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07002031 // onemask = materialize(1,1,...); dest = onemask & src
2032 Type DestTy = Dest->getType();
2033 Variable *OneMask = makeVectorOfOnes(DestTy);
2034 Variable *T = makeReg(DestTy);
2035 _movp(T, Src0RM);
2036 _pand(T, OneMask);
2037 _movp(Dest, T);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002038 } else if (Dest->getType() == IceType_i64) {
2039 // t1=movzx src; dst.lo=t1; dst.hi=0
2040 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2041 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2042 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2043 Variable *Tmp = makeReg(DestLo->getType());
2044 if (Src0RM->getType() == IceType_i32) {
2045 _mov(Tmp, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002046 } else {
2047 _movzx(Tmp, Src0RM);
2048 }
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002049 if (Src0RM->getType() == IceType_i1) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002050 Constant *One = Ctx->getConstantInt32(1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002051 _and(Tmp, One);
2052 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002053 _mov(DestLo, Tmp);
2054 _mov(DestHi, Zero);
2055 } else if (Src0RM->getType() == IceType_i1) {
2056 // t = Src0RM; t &= 1; Dest = t
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002057 Constant *One = Ctx->getConstantInt32(1);
Jan Voung39d4aca2014-10-15 15:16:54 -07002058 Type DestTy = Dest->getType();
2059 Variable *T;
2060 if (DestTy == IceType_i8) {
2061 T = makeReg(DestTy);
2062 _mov(T, Src0RM);
2063 } else {
2064 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
2065 T = makeReg(IceType_i32);
2066 _movzx(T, Src0RM);
2067 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002068 _and(T, One);
2069 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002070 } else {
2071 // t1 = movzx src; dst = t1
2072 Variable *T = makeReg(Dest->getType());
2073 _movzx(T, Src0RM);
2074 _mov(Dest, T);
2075 }
2076 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002077 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002078 case InstCast::Trunc: {
Matt Wala83b80362014-07-16 10:21:30 -07002079 if (isVectorType(Dest->getType())) {
2080 // onemask = materialize(1,1,...); dst = src & onemask
2081 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2082 Type Src0Ty = Src0RM->getType();
2083 Variable *OneMask = makeVectorOfOnes(Src0Ty);
2084 Variable *T = makeReg(Dest->getType());
2085 _movp(T, Src0RM);
2086 _pand(T, OneMask);
2087 _movp(Dest, T);
2088 } else {
2089 Operand *Src0 = Inst->getSrc(0);
2090 if (Src0->getType() == IceType_i64)
2091 Src0 = loOperand(Src0);
2092 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2093 // t1 = trunc Src0RM; Dest = t1
Jim Stichnothae953202014-12-20 06:17:49 -08002094 Variable *T = nullptr;
Matt Wala83b80362014-07-16 10:21:30 -07002095 _mov(T, Src0RM);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002096 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002097 _and(T, Ctx->getConstantInt1(1));
Matt Wala83b80362014-07-16 10:21:30 -07002098 _mov(Dest, T);
2099 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002100 break;
2101 }
2102 case InstCast::Fptrunc:
2103 case InstCast::Fpext: {
Jan Voung1ee34162014-06-24 13:43:30 -07002104 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002105 // t1 = cvt Src0RM; Dest = t1
2106 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002107 _cvt(T, Src0RM, InstX8632Cvt::Float2float);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002108 _mov(Dest, T);
2109 break;
2110 }
2111 case InstCast::Fptosi:
Matt Wala83b80362014-07-16 10:21:30 -07002112 if (isVectorType(Dest->getType())) {
2113 assert(Dest->getType() == IceType_v4i32 &&
2114 Inst->getSrc(0)->getType() == IceType_v4f32);
2115 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2116 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002117 _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
Matt Wala83b80362014-07-16 10:21:30 -07002118 _movp(Dest, T);
2119 } else if (Dest->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002120 // Use a helper for converting floating-point values to 64-bit
2121 // integers. SSE2 appears to have no way to convert from xmm
2122 // registers to something like the edx:eax register pair, and
2123 // gcc and clang both want to use x87 instructions complete with
2124 // temporary manipulation of the status word. This helper is
2125 // not needed for x86-64.
2126 split64(Dest);
2127 const SizeT MaxSrcs = 1;
2128 Type SrcType = Inst->getSrc(0)->getType();
2129 InstCall *Call = makeHelperCall(
Jan Voung3a569182014-09-29 10:16:01 -07002130 isFloat32Asserting32Or64(SrcType) ? "cvtftosi64" : "cvtdtosi64", Dest,
2131 MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002132 // TODO: Call the correct compiler-rt helper function.
2133 Call->addArg(Inst->getSrc(0));
2134 lowerCall(Call);
2135 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002136 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002137 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2138 Variable *T_1 = makeReg(IceType_i32);
2139 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002140 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002141 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002142 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002143 _and(T_2, Ctx->getConstantInt1(1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002144 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002145 }
2146 break;
2147 case InstCast::Fptoui:
Matt Wala83b80362014-07-16 10:21:30 -07002148 if (isVectorType(Dest->getType())) {
2149 assert(Dest->getType() == IceType_v4i32 &&
2150 Inst->getSrc(0)->getType() == IceType_v4f32);
2151 const SizeT MaxSrcs = 1;
2152 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
2153 Call->addArg(Inst->getSrc(0));
2154 lowerCall(Call);
2155 } else if (Dest->getType() == IceType_i64 ||
2156 Dest->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002157 // Use a helper for both x86-32 and x86-64.
2158 split64(Dest);
2159 const SizeT MaxSrcs = 1;
2160 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07002161 Type SrcType = Inst->getSrc(0)->getType();
Jan Voung3a569182014-09-29 10:16:01 -07002162 IceString DstSubstring = (isInt32Asserting32Or64(DestType) ? "32" : "64");
2163 IceString SrcSubstring = (isFloat32Asserting32Or64(SrcType) ? "f" : "d");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002164 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
2165 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
2166 // TODO: Call the correct compiler-rt helper function.
2167 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2168 Call->addArg(Inst->getSrc(0));
2169 lowerCall(Call);
2170 return;
2171 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002172 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002173 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2174 Variable *T_1 = makeReg(IceType_i32);
2175 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002176 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002177 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002178 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002179 _and(T_2, Ctx->getConstantInt1(1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002180 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002181 }
2182 break;
2183 case InstCast::Sitofp:
Matt Wala83b80362014-07-16 10:21:30 -07002184 if (isVectorType(Dest->getType())) {
2185 assert(Dest->getType() == IceType_v4f32 &&
2186 Inst->getSrc(0)->getType() == IceType_v4i32);
2187 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2188 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002189 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
Matt Wala83b80362014-07-16 10:21:30 -07002190 _movp(Dest, T);
2191 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002192 // Use a helper for x86-32.
2193 const SizeT MaxSrcs = 1;
2194 Type DestType = Dest->getType();
2195 InstCall *Call = makeHelperCall(
Jan Voung3a569182014-09-29 10:16:01 -07002196 isFloat32Asserting32Or64(DestType) ? "cvtsi64tof" : "cvtsi64tod",
2197 Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002198 // TODO: Call the correct compiler-rt helper function.
2199 Call->addArg(Inst->getSrc(0));
2200 lowerCall(Call);
2201 return;
2202 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002203 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002204 // Sign-extend the operand.
2205 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2206 Variable *T_1 = makeReg(IceType_i32);
2207 Variable *T_2 = makeReg(Dest->getType());
2208 if (Src0RM->getType() == IceType_i32)
2209 _mov(T_1, Src0RM);
2210 else
2211 _movsx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002212 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002213 _mov(Dest, T_2);
2214 }
2215 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002216 case InstCast::Uitofp: {
2217 Operand *Src0 = Inst->getSrc(0);
Matt Wala83b80362014-07-16 10:21:30 -07002218 if (isVectorType(Src0->getType())) {
2219 assert(Dest->getType() == IceType_v4f32 &&
2220 Src0->getType() == IceType_v4i32);
2221 const SizeT MaxSrcs = 1;
2222 InstCall *Call = makeHelperCall("Sz_uitofp_v4i32", Dest, MaxSrcs);
2223 Call->addArg(Src0);
2224 lowerCall(Call);
2225 } else if (Src0->getType() == IceType_i64 ||
2226 Src0->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002227 // Use a helper for x86-32 and x86-64. Also use a helper for
2228 // i32 on x86-32.
2229 const SizeT MaxSrcs = 1;
2230 Type DestType = Dest->getType();
Jan Voung3a569182014-09-29 10:16:01 -07002231 IceString SrcSubstring =
2232 (isInt32Asserting32Or64(Src0->getType()) ? "32" : "64");
2233 IceString DstSubstring = (isFloat32Asserting32Or64(DestType) ? "f" : "d");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002234 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
2235 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
2236 // TODO: Call the correct compiler-rt helper function.
2237 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Jan Voung1ee34162014-06-24 13:43:30 -07002238 Call->addArg(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002239 lowerCall(Call);
2240 return;
2241 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002242 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002243 // Zero-extend the operand.
2244 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2245 Variable *T_1 = makeReg(IceType_i32);
2246 Variable *T_2 = makeReg(Dest->getType());
2247 if (Src0RM->getType() == IceType_i32)
2248 _mov(T_1, Src0RM);
2249 else
2250 _movzx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002251 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002252 _mov(Dest, T_2);
2253 }
2254 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002255 }
2256 case InstCast::Bitcast: {
2257 Operand *Src0 = Inst->getSrc(0);
2258 if (Dest->getType() == Src0->getType()) {
2259 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002260 lowerAssign(Assign);
2261 return;
2262 }
2263 switch (Dest->getType()) {
2264 default:
2265 llvm_unreachable("Unexpected Bitcast dest type");
Matt Wala83b80362014-07-16 10:21:30 -07002266 case IceType_i8: {
2267 assert(Src0->getType() == IceType_v8i1);
2268 InstCall *Call = makeHelperCall("Sz_bitcast_v8i1_to_i8", Dest, 1);
2269 Call->addArg(Src0);
2270 lowerCall(Call);
2271 } break;
2272 case IceType_i16: {
2273 assert(Src0->getType() == IceType_v16i1);
2274 InstCall *Call = makeHelperCall("Sz_bitcast_v16i1_to_i16", Dest, 1);
2275 Call->addArg(Src0);
2276 lowerCall(Call);
2277 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002278 case IceType_i32:
2279 case IceType_f32: {
Jan Voung1ee34162014-06-24 13:43:30 -07002280 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002281 Type DestType = Dest->getType();
2282 Type SrcType = Src0RM->getType();
Jim Stichnoth6e992142014-07-30 14:45:20 -07002283 (void)DestType;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002284 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2285 (DestType == IceType_f32 && SrcType == IceType_i32));
2286 // a.i32 = bitcast b.f32 ==>
2287 // t.f32 = b.f32
2288 // s.f32 = spill t.f32
2289 // a.i32 = s.f32
Jim Stichnothae953202014-12-20 06:17:49 -08002290 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002291 // TODO: Should be able to force a spill setup by calling legalize() with
2292 // Legal_Mem and not Legal_Reg or Legal_Imm.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002293 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002294 SpillVar->setLinkedTo(Dest);
2295 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002296 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002297 _mov(T, Src0RM);
2298 _mov(Spill, T);
2299 _mov(Dest, Spill);
2300 } break;
2301 case IceType_i64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002302 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002303 assert(Src0RM->getType() == IceType_f64);
2304 // a.i64 = bitcast b.f64 ==>
2305 // s.f64 = spill b.f64
2306 // t_lo.i32 = lo(s.f64)
2307 // a_lo.i32 = t_lo.i32
2308 // t_hi.i32 = hi(s.f64)
2309 // a_hi.i32 = t_hi.i32
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002310 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002311 SpillVar->setLinkedTo(llvm::dyn_cast<Variable>(Src0RM));
2312 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002313 Spill->setWeight(RegWeight::Zero);
Jan Voung5cd240d2014-06-25 10:36:46 -07002314 _movq(Spill, Src0RM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002315
2316 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2317 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2318 Variable *T_Lo = makeReg(IceType_i32);
2319 Variable *T_Hi = makeReg(IceType_i32);
2320 VariableSplit *SpillLo =
2321 VariableSplit::create(Func, Spill, VariableSplit::Low);
2322 VariableSplit *SpillHi =
2323 VariableSplit::create(Func, Spill, VariableSplit::High);
2324
2325 _mov(T_Lo, SpillLo);
2326 _mov(DestLo, T_Lo);
2327 _mov(T_Hi, SpillHi);
2328 _mov(DestHi, T_Hi);
2329 } break;
2330 case IceType_f64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002331 Src0 = legalize(Src0);
2332 assert(Src0->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002333 // a.f64 = bitcast b.i64 ==>
2334 // t_lo.i32 = b_lo.i32
Jan Voung1ee34162014-06-24 13:43:30 -07002335 // FakeDef(s.f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002336 // lo(s.f64) = t_lo.i32
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002337 // t_hi.i32 = b_hi.i32
2338 // hi(s.f64) = t_hi.i32
2339 // a.f64 = s.f64
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002340 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002341 SpillVar->setLinkedTo(Dest);
2342 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002343 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002344
Jim Stichnothae953202014-12-20 06:17:49 -08002345 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002346 VariableSplit *SpillLo =
2347 VariableSplit::create(Func, Spill, VariableSplit::Low);
2348 VariableSplit *SpillHi =
2349 VariableSplit::create(Func, Spill, VariableSplit::High);
Jan Voung1ee34162014-06-24 13:43:30 -07002350 _mov(T_Lo, loOperand(Src0));
2351 // Technically, the Spill is defined after the _store happens, but
2352 // SpillLo is considered a "use" of Spill so define Spill before it
2353 // is used.
2354 Context.insert(InstFakeDef::create(Func, Spill));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002355 _store(T_Lo, SpillLo);
Jan Voung1ee34162014-06-24 13:43:30 -07002356 _mov(T_Hi, hiOperand(Src0));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002357 _store(T_Hi, SpillHi);
Jan Voung5cd240d2014-06-25 10:36:46 -07002358 _movq(Dest, Spill);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002359 } break;
Matt Wala83b80362014-07-16 10:21:30 -07002360 case IceType_v8i1: {
2361 assert(Src0->getType() == IceType_i8);
2362 InstCall *Call = makeHelperCall("Sz_bitcast_i8_to_v8i1", Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002363 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002364 // Arguments to functions are required to be at least 32 bits wide.
2365 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2366 Call->addArg(Src0AsI32);
2367 lowerCall(Call);
2368 } break;
2369 case IceType_v16i1: {
2370 assert(Src0->getType() == IceType_i16);
2371 InstCall *Call = makeHelperCall("Sz_bitcast_i16_to_v16i1", Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002372 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002373 // Arguments to functions are required to be at least 32 bits wide.
2374 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2375 Call->addArg(Src0AsI32);
2376 lowerCall(Call);
2377 } break;
2378 case IceType_v8i16:
2379 case IceType_v16i8:
2380 case IceType_v4i32:
2381 case IceType_v4f32: {
2382 _movp(Dest, legalizeToVar(Src0));
2383 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002384 }
2385 break;
2386 }
Jan Voung1ee34162014-06-24 13:43:30 -07002387 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002388}
2389
Matt Wala49889232014-07-18 12:45:09 -07002390void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002391 Operand *SourceVectNotLegalized = Inst->getSrc(0);
Jan Voungbc004632014-09-16 15:09:10 -07002392 ConstantInteger32 *ElementIndex =
2393 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
Matt Wala49889232014-07-18 12:45:09 -07002394 // Only constant indices are allowed in PNaCl IR.
2395 assert(ElementIndex);
2396
2397 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002398 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002399 Type ElementTy = typeElementType(Ty);
2400 Type InVectorElementTy = getInVectorElementType(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002401 Variable *ExtractedElementR = makeReg(InVectorElementTy);
Matt Wala49889232014-07-18 12:45:09 -07002402
2403 // TODO(wala): Determine the best lowering sequences for each type.
Matt Wala0a450512014-07-30 12:44:39 -07002404 bool CanUsePextr =
2405 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2406 if (CanUsePextr && Ty != IceType_v4f32) {
2407 // Use pextrb, pextrw, or pextrd.
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002408 Constant *Mask = Ctx->getConstantInt32(Index);
Matt Walae3777672014-07-31 09:06:17 -07002409 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2410 _pextr(ExtractedElementR, SourceVectR, Mask);
Matt Wala0a450512014-07-30 12:44:39 -07002411 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2412 // Use pshufd and movd/movss.
Jim Stichnothae953202014-12-20 06:17:49 -08002413 Variable *T = nullptr;
Matt Wala49889232014-07-18 12:45:09 -07002414 if (Index) {
2415 // The shuffle only needs to occur if the element to be extracted
2416 // is not at the lowest index.
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002417 Constant *Mask = Ctx->getConstantInt32(Index);
Matt Wala49889232014-07-18 12:45:09 -07002418 T = makeReg(Ty);
Matt Walad4799f42014-08-14 14:24:12 -07002419 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
Matt Wala49889232014-07-18 12:45:09 -07002420 } else {
Matt Walad4799f42014-08-14 14:24:12 -07002421 T = legalizeToVar(SourceVectNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002422 }
2423
2424 if (InVectorElementTy == IceType_i32) {
Matt Walae3777672014-07-31 09:06:17 -07002425 _movd(ExtractedElementR, T);
Jan Voung3a569182014-09-29 10:16:01 -07002426 } else { // Ty == IceType_f32
Matt Walacfe51462014-07-25 15:57:56 -07002427 // TODO(wala): _movss is only used here because _mov does not
2428 // allow a vector source and a scalar destination. _mov should be
2429 // able to be used here.
2430 // _movss is a binary instruction, so the FakeDef is needed to
2431 // keep the live range analysis consistent.
Matt Walae3777672014-07-31 09:06:17 -07002432 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2433 _movss(ExtractedElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002434 }
Matt Wala49889232014-07-18 12:45:09 -07002435 } else {
2436 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2437 // Spill the value to a stack slot and do the extraction in memory.
Matt Wala49889232014-07-18 12:45:09 -07002438 //
Matt Walae3777672014-07-31 09:06:17 -07002439 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002440 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002441 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07002442 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002443 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002444
2445 // Compute the location of the element in memory.
2446 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2447 OperandX8632Mem *Loc =
2448 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002449 _mov(ExtractedElementR, Loc);
Matt Wala49889232014-07-18 12:45:09 -07002450 }
2451
2452 if (ElementTy == IceType_i1) {
2453 // Truncate extracted integers to i1s if necessary.
2454 Variable *T = makeReg(IceType_i1);
2455 InstCast *Cast =
Matt Walae3777672014-07-31 09:06:17 -07002456 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002457 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002458 ExtractedElementR = T;
Matt Wala49889232014-07-18 12:45:09 -07002459 }
2460
2461 // Copy the element to the destination.
2462 Variable *Dest = Inst->getDest();
Matt Walae3777672014-07-31 09:06:17 -07002463 _mov(Dest, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002464}
2465
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002466void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
2467 Operand *Src0 = Inst->getSrc(0);
2468 Operand *Src1 = Inst->getSrc(1);
2469 Variable *Dest = Inst->getDest();
Matt Walace0ca8f2014-07-24 12:34:20 -07002470
2471 if (isVectorType(Dest->getType())) {
2472 InstFcmp::FCond Condition = Inst->getCondition();
2473 size_t Index = static_cast<size_t>(Condition);
2474 assert(Index < TableFcmpSize);
2475
2476 if (TableFcmp[Index].SwapVectorOperands) {
2477 Operand *T = Src0;
2478 Src0 = Src1;
2479 Src1 = T;
2480 }
2481
Jim Stichnothae953202014-12-20 06:17:49 -08002482 Variable *T = nullptr;
Matt Walace0ca8f2014-07-24 12:34:20 -07002483
Matt Walae3777672014-07-31 09:06:17 -07002484 if (Condition == InstFcmp::True) {
2485 // makeVectorOfOnes() requires an integer vector type.
Matt Walace0ca8f2014-07-24 12:34:20 -07002486 T = makeVectorOfMinusOnes(IceType_v4i32);
Matt Walae3777672014-07-31 09:06:17 -07002487 } else if (Condition == InstFcmp::False) {
2488 T = makeVectorOfZeros(Dest->getType());
2489 } else {
2490 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2491 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2492
Matt Walae3777672014-07-31 09:06:17 -07002493 switch (Condition) {
2494 default: {
Jan Voungbd385e42014-09-18 18:18:10 -07002495 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;
2496 assert(Predicate != CondX86::Cmpps_Invalid);
Matt Walae3777672014-07-31 09:06:17 -07002497 T = makeReg(Src0RM->getType());
2498 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002499 _cmpps(T, Src1RM, Predicate);
Matt Walae3777672014-07-31 09:06:17 -07002500 } break;
2501 case InstFcmp::One: {
2502 // Check both unequal and ordered.
2503 T = makeReg(Src0RM->getType());
2504 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002505 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002506 _cmpps(T, Src1RM, CondX86::Cmpps_neq);
Matt Walae3777672014-07-31 09:06:17 -07002507 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002508 _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
Matt Walae3777672014-07-31 09:06:17 -07002509 _pand(T, T2);
2510 } break;
2511 case InstFcmp::Ueq: {
2512 // Check both equal or unordered.
2513 T = makeReg(Src0RM->getType());
2514 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002515 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002516 _cmpps(T, Src1RM, CondX86::Cmpps_eq);
Matt Walae3777672014-07-31 09:06:17 -07002517 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002518 _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
Matt Walae3777672014-07-31 09:06:17 -07002519 _por(T, T2);
2520 } break;
2521 }
Matt Walae3777672014-07-31 09:06:17 -07002522 }
Matt Walace0ca8f2014-07-24 12:34:20 -07002523
2524 _movp(Dest, T);
2525 eliminateNextVectorSextInstruction(Dest);
2526 return;
2527 }
2528
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002529 // Lowering a = fcmp cond, b, c
2530 // ucomiss b, c /* only if C1 != Br_None */
2531 // /* but swap b,c order if SwapOperands==true */
2532 // mov a, <default>
2533 // j<C1> label /* only if C1 != Br_None */
2534 // j<C2> label /* only if C2 != Br_None */
2535 // FakeUse(a) /* only if C1 != Br_None */
2536 // mov a, !<default> /* only if C1 != Br_None */
2537 // label: /* only if C1 != Br_None */
2538 InstFcmp::FCond Condition = Inst->getCondition();
2539 size_t Index = static_cast<size_t>(Condition);
2540 assert(Index < TableFcmpSize);
Matt Walace0ca8f2014-07-24 12:34:20 -07002541 if (TableFcmp[Index].SwapScalarOperands) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002542 Operand *Tmp = Src0;
2543 Src0 = Src1;
2544 Src1 = Tmp;
2545 }
Jan Voungbd385e42014-09-18 18:18:10 -07002546 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
2547 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002548 if (HasC1) {
2549 Src0 = legalize(Src0);
2550 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnothae953202014-12-20 06:17:49 -08002551 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002552 _mov(T, Src0);
2553 _ucomiss(T, Src1RM);
2554 }
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002555 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002556 _mov(Dest, Default);
2557 if (HasC1) {
2558 InstX8632Label *Label = InstX8632Label::create(Func, this);
2559 _br(TableFcmp[Index].C1, Label);
2560 if (HasC2) {
2561 _br(TableFcmp[Index].C2, Label);
2562 }
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002563 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);
Jim Stichnoth47752552014-10-13 17:15:08 -07002564 _mov_nonkillable(Dest, NonDefault);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002565 Context.insert(Label);
2566 }
2567}
2568
2569void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2570 Operand *Src0 = legalize(Inst->getSrc(0));
2571 Operand *Src1 = legalize(Inst->getSrc(1));
2572 Variable *Dest = Inst->getDest();
2573
Matt Wala9a0168a2014-07-23 14:56:10 -07002574 if (isVectorType(Dest->getType())) {
2575 Type Ty = Src0->getType();
2576 // Promote i1 vectors to 128 bit integer vector types.
2577 if (typeElementType(Ty) == IceType_i1) {
2578 Type NewTy = IceType_NUM;
2579 switch (Ty) {
2580 default:
2581 llvm_unreachable("unexpected type");
2582 break;
2583 case IceType_v4i1:
2584 NewTy = IceType_v4i32;
2585 break;
2586 case IceType_v8i1:
2587 NewTy = IceType_v8i16;
2588 break;
2589 case IceType_v16i1:
2590 NewTy = IceType_v16i8;
2591 break;
2592 }
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002593 Variable *NewSrc0 = Func->makeVariable(NewTy);
2594 Variable *NewSrc1 = Func->makeVariable(NewTy);
Matt Wala9a0168a2014-07-23 14:56:10 -07002595 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2596 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2597 Src0 = NewSrc0;
2598 Src1 = NewSrc1;
2599 Ty = NewTy;
2600 }
2601
2602 InstIcmp::ICond Condition = Inst->getCondition();
2603
Matt Walae3777672014-07-31 09:06:17 -07002604 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2605 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2606
Matt Wala9a0168a2014-07-23 14:56:10 -07002607 // SSE2 only has signed comparison operations. Transform unsigned
2608 // inputs in a manner that allows for the use of signed comparison
2609 // operations by flipping the high order bits.
2610 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2611 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2612 Variable *T0 = makeReg(Ty);
2613 Variable *T1 = makeReg(Ty);
2614 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002615 _movp(T0, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002616 _pxor(T0, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002617 _movp(T1, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002618 _pxor(T1, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002619 Src0RM = T0;
2620 Src1RM = T1;
Matt Wala9a0168a2014-07-23 14:56:10 -07002621 }
2622
Matt Wala9a0168a2014-07-23 14:56:10 -07002623 Variable *T = makeReg(Ty);
2624 switch (Condition) {
2625 default:
2626 llvm_unreachable("unexpected condition");
2627 break;
2628 case InstIcmp::Eq: {
Matt Walae3777672014-07-31 09:06:17 -07002629 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002630 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002631 } break;
2632 case InstIcmp::Ne: {
Matt Walae3777672014-07-31 09:06:17 -07002633 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002634 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002635 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2636 _pxor(T, MinusOne);
2637 } break;
2638 case InstIcmp::Ugt:
2639 case InstIcmp::Sgt: {
Matt Walae3777672014-07-31 09:06:17 -07002640 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002641 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002642 } break;
2643 case InstIcmp::Uge:
2644 case InstIcmp::Sge: {
Matt Walae3777672014-07-31 09:06:17 -07002645 // !(Src1RM > Src0RM)
2646 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002647 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002648 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2649 _pxor(T, MinusOne);
2650 } break;
2651 case InstIcmp::Ult:
2652 case InstIcmp::Slt: {
Matt Walae3777672014-07-31 09:06:17 -07002653 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002654 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002655 } break;
2656 case InstIcmp::Ule:
2657 case InstIcmp::Sle: {
Matt Walae3777672014-07-31 09:06:17 -07002658 // !(Src0RM > Src1RM)
2659 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002660 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002661 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2662 _pxor(T, MinusOne);
2663 } break;
2664 }
Matt Wala9a0168a2014-07-23 14:56:10 -07002665
2666 _movp(Dest, T);
Matt Walace0ca8f2014-07-24 12:34:20 -07002667 eliminateNextVectorSextInstruction(Dest);
Matt Wala9a0168a2014-07-23 14:56:10 -07002668 return;
2669 }
2670
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002671 // If Src1 is an immediate, or known to be a physical register, we can
2672 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2673 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2674 // the physical register, but unfortunately we have to commit to one or
2675 // the other before register allocation.)
2676 bool IsSrc1ImmOrReg = false;
2677 if (llvm::isa<Constant>(Src1)) {
2678 IsSrc1ImmOrReg = true;
2679 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
2680 if (Var->hasReg())
2681 IsSrc1ImmOrReg = true;
2682 }
2683
2684 // Try to fuse a compare immediately followed by a conditional branch. This
2685 // is possible when the compare dest and the branch source operands are the
2686 // same, and are their only uses. TODO: implement this optimization for i64.
2687 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
2688 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
2689 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07002690 NextBr->setDeleted();
Jim Stichnothad403532014-09-25 12:44:17 -07002691 Operand *Src0RM =
2692 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002693 _cmp(Src0RM, Src1);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002694 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2695 NextBr->getTargetFalse());
2696 // Skip over the following branch instruction.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002697 Context.advanceNext();
2698 return;
2699 }
2700 }
2701
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002702 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
Matt Wala43ff7eb2014-06-18 10:30:07 -07002703 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002704 Constant *One = Ctx->getConstantInt32(1);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002705 if (Src0->getType() == IceType_i64) {
2706 InstIcmp::ICond Condition = Inst->getCondition();
2707 size_t Index = static_cast<size_t>(Condition);
2708 assert(Index < TableIcmp64Size);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002709 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2710 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002711 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2712 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2713 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2714 InstX8632Label *Label = InstX8632Label::create(Func, this);
2715 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002716 _cmp(Src0LoRM, Src1LoRI);
Jan Voungbd385e42014-09-18 18:18:10 -07002717 _br(CondX86::Br_ne, Label);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002718 _cmp(Src0HiRM, Src1HiRI);
Jan Voungbd385e42014-09-18 18:18:10 -07002719 _br(CondX86::Br_ne, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07002720 _mov_nonkillable(Dest, (Condition == InstIcmp::Eq ? One : Zero));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002721 Context.insert(Label);
2722 } else {
2723 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2724 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2725 _mov(Dest, One);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002726 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002727 _br(TableIcmp64[Index].C1, LabelTrue);
2728 _br(TableIcmp64[Index].C2, LabelFalse);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002729 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002730 _br(TableIcmp64[Index].C3, LabelTrue);
2731 Context.insert(LabelFalse);
Jim Stichnoth47752552014-10-13 17:15:08 -07002732 _mov_nonkillable(Dest, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002733 Context.insert(LabelTrue);
2734 }
2735 return;
2736 }
2737
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002738 // cmp b, c
Jim Stichnothad403532014-09-25 12:44:17 -07002739 Operand *Src0RM =
2740 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002741 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002742 _cmp(Src0RM, Src1);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002743 _mov(Dest, One);
2744 _br(getIcmp32Mapping(Inst->getCondition()), Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07002745 _mov_nonkillable(Dest, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002746 Context.insert(Label);
2747}
2748
Matt Wala49889232014-07-18 12:45:09 -07002749void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002750 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2751 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
Jan Voungbc004632014-09-16 15:09:10 -07002752 ConstantInteger32 *ElementIndex =
2753 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
Matt Wala49889232014-07-18 12:45:09 -07002754 // Only constant indices are allowed in PNaCl IR.
2755 assert(ElementIndex);
2756 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002757 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
Matt Wala49889232014-07-18 12:45:09 -07002758
Matt Walae3777672014-07-31 09:06:17 -07002759 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002760 Type ElementTy = typeElementType(Ty);
2761 Type InVectorElementTy = getInVectorElementType(Ty);
2762
2763 if (ElementTy == IceType_i1) {
2764 // Expand the element to the appropriate size for it to be inserted
2765 // in the vector.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002766 Variable *Expanded = Func->makeVariable(InVectorElementTy);
Matt Walae3777672014-07-31 09:06:17 -07002767 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2768 ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002769 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002770 ElementToInsertNotLegalized = Expanded;
Matt Wala49889232014-07-18 12:45:09 -07002771 }
2772
Matt Wala0a450512014-07-30 12:44:39 -07002773 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2774 // Use insertps, pinsrb, pinsrw, or pinsrd.
Matt Walae3777672014-07-31 09:06:17 -07002775 Operand *ElementRM =
2776 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2777 Operand *SourceVectRM =
2778 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07002779 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002780 _movp(T, SourceVectRM);
Matt Wala0a450512014-07-30 12:44:39 -07002781 if (Ty == IceType_v4f32)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002782 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
Matt Wala0a450512014-07-30 12:44:39 -07002783 else
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002784 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
Matt Wala0a450512014-07-30 12:44:39 -07002785 _movp(Inst->getDest(), T);
2786 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2787 // Use shufps or movss.
Jim Stichnothae953202014-12-20 06:17:49 -08002788 Variable *ElementR = nullptr;
Matt Walae3777672014-07-31 09:06:17 -07002789 Operand *SourceVectRM =
2790 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2791
Matt Wala49889232014-07-18 12:45:09 -07002792 if (InVectorElementTy == IceType_f32) {
Matt Walae3777672014-07-31 09:06:17 -07002793 // ElementR will be in an XMM register since it is floating point.
2794 ElementR = legalizeToVar(ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002795 } else {
2796 // Copy an integer to an XMM register.
Matt Walae3777672014-07-31 09:06:17 -07002797 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2798 ElementR = makeReg(Ty);
2799 _movd(ElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002800 }
2801
Matt Walacfe51462014-07-25 15:57:56 -07002802 if (Index == 0) {
2803 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002804 _movp(T, SourceVectRM);
2805 _movss(T, ElementR);
Matt Walacfe51462014-07-25 15:57:56 -07002806 _movp(Inst->getDest(), T);
2807 return;
2808 }
2809
Matt Wala49889232014-07-18 12:45:09 -07002810 // shufps treats the source and desination operands as vectors of
2811 // four doublewords. The destination's two high doublewords are
2812 // selected from the source operand and the two low doublewords are
2813 // selected from the (original value of) the destination operand.
2814 // An insertelement operation can be effected with a sequence of two
2815 // shufps operations with appropriate masks. In all cases below,
2816 // Element[0] is being inserted into SourceVectOperand. Indices are
2817 // ordered from left to right.
2818 //
Matt Walae3777672014-07-31 09:06:17 -07002819 // insertelement into index 1 (result is stored in ElementR):
2820 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
2821 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
Matt Wala49889232014-07-18 12:45:09 -07002822 //
2823 // insertelement into index 2 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002824 // T := SourceVectRM
2825 // ElementR := ElementR[0, 0] T[0, 3]
2826 // T := T[0, 1] ElementR[0, 3]
Matt Wala49889232014-07-18 12:45:09 -07002827 //
2828 // insertelement into index 3 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002829 // T := SourceVectRM
2830 // ElementR := ElementR[0, 0] T[0, 2]
2831 // T := T[0, 1] ElementR[3, 0]
Jim Stichnothfac55172014-10-01 13:06:21 -07002832 const unsigned char Mask1[3] = { 0, 192, 128 };
2833 const unsigned char Mask2[3] = { 227, 196, 52 };
Matt Wala49889232014-07-18 12:45:09 -07002834
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002835 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
2836 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
Matt Wala49889232014-07-18 12:45:09 -07002837
Matt Walacfe51462014-07-25 15:57:56 -07002838 if (Index == 1) {
Matt Walae3777672014-07-31 09:06:17 -07002839 _shufps(ElementR, SourceVectRM, Mask1Constant);
2840 _shufps(ElementR, SourceVectRM, Mask2Constant);
2841 _movp(Inst->getDest(), ElementR);
Matt Wala49889232014-07-18 12:45:09 -07002842 } else {
2843 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002844 _movp(T, SourceVectRM);
2845 _shufps(ElementR, T, Mask1Constant);
2846 _shufps(T, ElementR, Mask2Constant);
Matt Wala49889232014-07-18 12:45:09 -07002847 _movp(Inst->getDest(), T);
2848 }
Matt Wala49889232014-07-18 12:45:09 -07002849 } else {
2850 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2851 // Spill the value to a stack slot and perform the insertion in
2852 // memory.
Matt Wala49889232014-07-18 12:45:09 -07002853 //
Matt Walae3777672014-07-31 09:06:17 -07002854 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002855 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002856 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07002857 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002858 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002859
2860 // Compute the location of the position to insert in memory.
2861 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2862 OperandX8632Mem *Loc =
2863 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002864 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
Matt Wala49889232014-07-18 12:45:09 -07002865
2866 Variable *T = makeReg(Ty);
2867 _movp(T, Slot);
2868 _movp(Inst->getDest(), T);
2869 }
2870}
2871
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002872void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2873 switch (Instr->getIntrinsicInfo().ID) {
Jan Vounga3a01a22014-07-14 10:32:41 -07002874 case Intrinsics::AtomicCmpxchg: {
Jan Voung5cd240d2014-06-25 10:36:46 -07002875 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002876 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002877 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2878 return;
2879 }
2880 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002881 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002882 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2883 return;
2884 }
Jan Vounga3a01a22014-07-14 10:32:41 -07002885 Variable *DestPrev = Instr->getDest();
2886 Operand *PtrToMem = Instr->getArg(0);
2887 Operand *Expected = Instr->getArg(1);
2888 Operand *Desired = Instr->getArg(2);
Jan Voungc820ddf2014-07-29 14:38:51 -07002889 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2890 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002891 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
Jan Voung5cd240d2014-06-25 10:36:46 -07002892 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002893 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002894 case Intrinsics::AtomicFence:
Jan Voung5cd240d2014-06-25 10:36:46 -07002895 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002896 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002897 Func->setError("Unexpected memory ordering for AtomicFence");
2898 return;
2899 }
2900 _mfence();
2901 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002902 case Intrinsics::AtomicFenceAll:
Jan Voung5cd240d2014-06-25 10:36:46 -07002903 // NOTE: FenceAll should prevent and load/store from being moved
2904 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2905 // instruction is currently marked coarsely as "HasSideEffects".
2906 _mfence();
2907 return;
2908 case Intrinsics::AtomicIsLockFree: {
2909 // X86 is always lock free for 8/16/32/64 bit accesses.
2910 // TODO(jvoung): Since the result is constant when given a constant
2911 // byte size, this opens up DCE opportunities.
2912 Operand *ByteSize = Instr->getArg(0);
2913 Variable *Dest = Instr->getDest();
Jan Voungbc004632014-09-16 15:09:10 -07002914 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002915 Constant *Result;
2916 switch (CI->getValue()) {
2917 default:
2918 // Some x86-64 processors support the cmpxchg16b intruction, which
2919 // can make 16-byte operations lock free (when used with the LOCK
2920 // prefix). However, that's not supported in 32-bit mode, so just
2921 // return 0 even for large sizes.
2922 Result = Ctx->getConstantZero(IceType_i32);
2923 break;
2924 case 1:
2925 case 2:
2926 case 4:
2927 case 8:
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002928 Result = Ctx->getConstantInt32(1);
Jan Voung5cd240d2014-06-25 10:36:46 -07002929 break;
2930 }
2931 _mov(Dest, Result);
2932 return;
2933 }
2934 // The PNaCl ABI requires the byte size to be a compile-time constant.
2935 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2936 return;
2937 }
2938 case Intrinsics::AtomicLoad: {
2939 // We require the memory address to be naturally aligned.
2940 // Given that is the case, then normal loads are atomic.
2941 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002942 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002943 Func->setError("Unexpected memory ordering for AtomicLoad");
2944 return;
2945 }
2946 Variable *Dest = Instr->getDest();
2947 if (Dest->getType() == IceType_i64) {
2948 // Follow what GCC does and use a movq instead of what lowerLoad()
2949 // normally does (split the load into two).
2950 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2951 // can't happen anyway, since this is x86-32 and integer arithmetic only
2952 // happens on 32-bit quantities.
2953 Variable *T = makeReg(IceType_f64);
2954 OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64);
2955 _movq(T, Addr);
2956 // Then cast the bits back out of the XMM register to the i64 Dest.
2957 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2958 lowerCast(Cast);
Jan Vounge6e497d2014-07-30 10:06:03 -07002959 // Make sure that the atomic load isn't elided when unused.
Jan Voung5cd240d2014-06-25 10:36:46 -07002960 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2961 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2962 return;
2963 }
2964 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2965 lowerLoad(Load);
Jan Vounge6e497d2014-07-30 10:06:03 -07002966 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2967 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2968 // insert the FakeUse on the last-inserted instruction's dest.
Jim Stichnoth6e992142014-07-30 14:45:20 -07002969 Context.insert(
2970 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
Jan Voung5cd240d2014-06-25 10:36:46 -07002971 return;
2972 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002973 case Intrinsics::AtomicRMW:
Jan Voung5cd240d2014-06-25 10:36:46 -07002974 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002975 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002976 Func->setError("Unexpected memory ordering for AtomicRMW");
2977 return;
2978 }
2979 lowerAtomicRMW(Instr->getDest(),
Jan Voungbc004632014-09-16 15:09:10 -07002980 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
Jim Stichnothfac55172014-10-01 13:06:21 -07002981 Instr->getArg(0))->getValue()),
Jan Voung5cd240d2014-06-25 10:36:46 -07002982 Instr->getArg(1), Instr->getArg(2));
2983 return;
2984 case Intrinsics::AtomicStore: {
2985 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002986 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002987 Func->setError("Unexpected memory ordering for AtomicStore");
2988 return;
2989 }
2990 // We require the memory address to be naturally aligned.
2991 // Given that is the case, then normal stores are atomic.
2992 // Add a fence after the store to make it visible.
2993 Operand *Value = Instr->getArg(0);
2994 Operand *Ptr = Instr->getArg(1);
2995 if (Value->getType() == IceType_i64) {
2996 // Use a movq instead of what lowerStore() normally does
2997 // (split the store into two), following what GCC does.
2998 // Cast the bits from int -> to an xmm register first.
2999 Variable *T = makeReg(IceType_f64);
3000 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
3001 lowerCast(Cast);
3002 // Then store XMM w/ a movq.
3003 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64);
3004 _storeq(T, Addr);
3005 _mfence();
3006 return;
3007 }
3008 InstStore *Store = InstStore::create(Func, Value, Ptr);
3009 lowerStore(Store);
3010 _mfence();
3011 return;
3012 }
Jan Voung7fa813b2014-07-18 13:01:08 -07003013 case Intrinsics::Bswap: {
3014 Variable *Dest = Instr->getDest();
3015 Operand *Val = Instr->getArg(0);
3016 // In 32-bit mode, bswap only works on 32-bit arguments, and the
3017 // argument must be a register. Use rotate left for 16-bit bswap.
3018 if (Val->getType() == IceType_i64) {
3019 Variable *T_Lo = legalizeToVar(loOperand(Val));
3020 Variable *T_Hi = legalizeToVar(hiOperand(Val));
3021 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3022 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3023 _bswap(T_Lo);
3024 _bswap(T_Hi);
3025 _mov(DestLo, T_Hi);
3026 _mov(DestHi, T_Lo);
3027 } else if (Val->getType() == IceType_i32) {
3028 Variable *T = legalizeToVar(Val);
3029 _bswap(T);
3030 _mov(Dest, T);
3031 } else {
3032 assert(Val->getType() == IceType_i16);
3033 Val = legalize(Val);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003034 Constant *Eight = Ctx->getConstantInt16(8);
Jim Stichnothae953202014-12-20 06:17:49 -08003035 Variable *T = nullptr;
Jan Voung7fa813b2014-07-18 13:01:08 -07003036 _mov(T, Val);
3037 _rol(T, Eight);
3038 _mov(Dest, T);
3039 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003040 return;
Jan Voung7fa813b2014-07-18 13:01:08 -07003041 }
Jan Vounge4da26f2014-07-15 17:52:39 -07003042 case Intrinsics::Ctpop: {
3043 Variable *Dest = Instr->getDest();
3044 Operand *Val = Instr->getArg(0);
Jan Voung3a569182014-09-29 10:16:01 -07003045 InstCall *Call =
3046 makeHelperCall(isInt32Asserting32Or64(Val->getType()) ? "__popcountsi2"
3047 : "__popcountdi2",
3048 Dest, 1);
Jan Vounge4da26f2014-07-15 17:52:39 -07003049 Call->addArg(Val);
3050 lowerCall(Call);
3051 // The popcount helpers always return 32-bit values, while the intrinsic's
3052 // signature matches the native POPCNT instruction and fills a 64-bit reg
3053 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
3054 // the user doesn't do that in the IR. If the user does that in the IR,
3055 // then this zero'ing instruction is dead and gets optimized out.
3056 if (Val->getType() == IceType_i64) {
3057 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3058 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3059 _mov(DestHi, Zero);
3060 }
3061 return;
3062 }
3063 case Intrinsics::Ctlz: {
3064 // The "is zero undef" parameter is ignored and we always return
3065 // a well-defined value.
3066 Operand *Val = legalize(Instr->getArg(0));
3067 Operand *FirstVal;
Jim Stichnothae953202014-12-20 06:17:49 -08003068 Operand *SecondVal = nullptr;
Jan Vounge4da26f2014-07-15 17:52:39 -07003069 if (Val->getType() == IceType_i64) {
3070 FirstVal = loOperand(Val);
3071 SecondVal = hiOperand(Val);
3072 } else {
3073 FirstVal = Val;
3074 }
3075 const bool IsCttz = false;
3076 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3077 SecondVal);
3078 return;
3079 }
3080 case Intrinsics::Cttz: {
3081 // The "is zero undef" parameter is ignored and we always return
3082 // a well-defined value.
3083 Operand *Val = legalize(Instr->getArg(0));
3084 Operand *FirstVal;
Jim Stichnothae953202014-12-20 06:17:49 -08003085 Operand *SecondVal = nullptr;
Jan Vounge4da26f2014-07-15 17:52:39 -07003086 if (Val->getType() == IceType_i64) {
3087 FirstVal = hiOperand(Val);
3088 SecondVal = loOperand(Val);
3089 } else {
3090 FirstVal = Val;
3091 }
3092 const bool IsCttz = true;
3093 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3094 SecondVal);
3095 return;
3096 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003097 case Intrinsics::Longjmp: {
Jim Stichnothae953202014-12-20 06:17:49 -08003098 InstCall *Call = makeHelperCall("longjmp", nullptr, 2);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003099 Call->addArg(Instr->getArg(0));
3100 Call->addArg(Instr->getArg(1));
3101 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003102 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003103 }
3104 case Intrinsics::Memcpy: {
3105 // In the future, we could potentially emit an inline memcpy/memset, etc.
3106 // for intrinsic calls w/ a known length.
Jim Stichnothae953202014-12-20 06:17:49 -08003107 InstCall *Call = makeHelperCall("memcpy", nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003108 Call->addArg(Instr->getArg(0));
3109 Call->addArg(Instr->getArg(1));
3110 Call->addArg(Instr->getArg(2));
3111 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003112 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003113 }
3114 case Intrinsics::Memmove: {
Jim Stichnothae953202014-12-20 06:17:49 -08003115 InstCall *Call = makeHelperCall("memmove", nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003116 Call->addArg(Instr->getArg(0));
3117 Call->addArg(Instr->getArg(1));
3118 Call->addArg(Instr->getArg(2));
3119 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003120 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003121 }
3122 case Intrinsics::Memset: {
3123 // The value operand needs to be extended to a stack slot size
Matt Wala105b7042014-08-11 19:56:19 -07003124 // because the PNaCl ABI requires arguments to be at least 32 bits
3125 // wide.
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003126 Operand *ValOp = Instr->getArg(1);
3127 assert(ValOp->getType() == IceType_i8);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003128 Variable *ValExt = Func->makeVariable(stackSlotType());
Jan Voung957c50d2014-07-21 14:05:29 -07003129 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jim Stichnothae953202014-12-20 06:17:49 -08003130 InstCall *Call = makeHelperCall("memset", nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003131 Call->addArg(Instr->getArg(0));
3132 Call->addArg(ValExt);
3133 Call->addArg(Instr->getArg(2));
3134 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003135 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003136 }
3137 case Intrinsics::NaClReadTP: {
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003138 if (Ctx->getFlags().UseSandboxing) {
3139 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnothae953202014-12-20 06:17:49 -08003140 Operand *Src =
3141 OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0,
3142 OperandX8632Mem::SegReg_GS);
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003143 Variable *Dest = Instr->getDest();
Jim Stichnothae953202014-12-20 06:17:49 -08003144 Variable *T = nullptr;
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003145 _mov(T, Src);
3146 _mov(Dest, T);
3147 } else {
3148 InstCall *Call = makeHelperCall("__nacl_read_tp", Instr->getDest(), 0);
3149 lowerCall(Call);
3150 }
Jan Voung5cd240d2014-06-25 10:36:46 -07003151 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003152 }
3153 case Intrinsics::Setjmp: {
3154 InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1);
3155 Call->addArg(Instr->getArg(0));
3156 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003157 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003158 }
Jan Voungf37fbbe2014-07-09 16:13:13 -07003159 case Intrinsics::Sqrt: {
3160 Operand *Src = legalize(Instr->getArg(0));
3161 Variable *Dest = Instr->getDest();
3162 Variable *T = makeReg(Dest->getType());
3163 _sqrtss(T, Src);
3164 _mov(Dest, T);
3165 return;
3166 }
Jan Voung7b34b592014-07-18 13:12:58 -07003167 case Intrinsics::Stacksave: {
Jan Voungbd385e42014-09-18 18:18:10 -07003168 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung7b34b592014-07-18 13:12:58 -07003169 Variable *Dest = Instr->getDest();
3170 _mov(Dest, esp);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003171 return;
Jan Voung7b34b592014-07-18 13:12:58 -07003172 }
3173 case Intrinsics::Stackrestore: {
Jan Voungbd385e42014-09-18 18:18:10 -07003174 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth47752552014-10-13 17:15:08 -07003175 _mov_nonkillable(esp, Instr->getArg(0));
Jan Voung7b34b592014-07-18 13:12:58 -07003176 return;
3177 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003178 case Intrinsics::Trap:
3179 _ud2();
Jan Voung5cd240d2014-06-25 10:36:46 -07003180 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003181 case Intrinsics::UnknownIntrinsic:
3182 Func->setError("Should not be lowering UnknownIntrinsic");
3183 return;
3184 }
3185 return;
3186}
3187
Jan Vounga3a01a22014-07-14 10:32:41 -07003188void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
3189 Operand *Expected, Operand *Desired) {
3190 if (Expected->getType() == IceType_i64) {
3191 // Reserve the pre-colored registers first, before adding any more
3192 // infinite-weight variables from FormMemoryOperand's legalization.
Jan Voungbd385e42014-09-18 18:18:10 -07003193 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3194 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3195 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3196 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003197 _mov(T_eax, loOperand(Expected));
3198 _mov(T_edx, hiOperand(Expected));
3199 _mov(T_ebx, loOperand(Desired));
3200 _mov(T_ecx, hiOperand(Desired));
3201 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3202 const bool Locked = true;
3203 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3204 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3205 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3206 _mov(DestLo, T_eax);
3207 _mov(DestHi, T_edx);
3208 return;
3209 }
Jan Voungbd385e42014-09-18 18:18:10 -07003210 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003211 _mov(T_eax, Expected);
3212 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3213 Variable *DesiredReg = legalizeToVar(Desired);
3214 const bool Locked = true;
3215 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3216 _mov(DestPrev, T_eax);
3217}
3218
Jan Voungc820ddf2014-07-29 14:38:51 -07003219bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
3220 Operand *Expected,
3221 Operand *Desired) {
3222 if (Ctx->getOptLevel() == Opt_m1)
3223 return false;
3224 // Peek ahead a few instructions and see how Dest is used.
3225 // It's very common to have:
3226 //
3227 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3228 // [%y_phi = ...] // list of phi stores
3229 // %p = icmp eq i32 %x, %expected
3230 // br i1 %p, label %l1, label %l2
3231 //
3232 // which we can optimize into:
3233 //
3234 // %x = <cmpxchg code>
3235 // [%y_phi = ...] // list of phi stores
3236 // br eq, %l1, %l2
3237 InstList::iterator I = Context.getCur();
3238 // I is currently the InstIntrinsicCall. Peek past that.
3239 // This assumes that the atomic cmpxchg has not been lowered yet,
3240 // so that the instructions seen in the scan from "Cur" is simple.
3241 assert(llvm::isa<InstIntrinsicCall>(*I));
3242 Inst *NextInst = Context.getNextInst(I);
3243 if (!NextInst)
3244 return false;
3245 // There might be phi assignments right before the compare+branch, since this
3246 // could be a backward branch for a loop. This placement of assignments is
3247 // determined by placePhiStores().
3248 std::vector<InstAssign *> PhiAssigns;
3249 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3250 if (PhiAssign->getDest() == Dest)
3251 return false;
3252 PhiAssigns.push_back(PhiAssign);
3253 NextInst = Context.getNextInst(I);
3254 if (!NextInst)
3255 return false;
3256 }
3257 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3258 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3259 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3260 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3261 return false;
3262 }
3263 NextInst = Context.getNextInst(I);
3264 if (!NextInst)
3265 return false;
3266 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3267 if (!NextBr->isUnconditional() &&
3268 NextCmp->getDest() == NextBr->getCondition() &&
3269 NextBr->isLastUse(NextCmp->getDest())) {
3270 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3271 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3272 // Lower the phi assignments now, before the branch (same placement
3273 // as before).
3274 InstAssign *PhiAssign = PhiAssigns[i];
Jan Voungc820ddf2014-07-29 14:38:51 -07003275 PhiAssign->setDeleted();
Jim Stichnoth89d79562014-08-27 13:50:03 -07003276 lowerAssign(PhiAssign);
Jan Voungc820ddf2014-07-29 14:38:51 -07003277 Context.advanceNext();
3278 }
Jan Voungbd385e42014-09-18 18:18:10 -07003279 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
Jan Voungc820ddf2014-07-29 14:38:51 -07003280 // Skip over the old compare and branch, by deleting them.
3281 NextCmp->setDeleted();
3282 NextBr->setDeleted();
3283 Context.advanceNext();
3284 Context.advanceNext();
3285 return true;
3286 }
3287 }
3288 }
3289 return false;
3290}
3291
Jan Voung5cd240d2014-06-25 10:36:46 -07003292void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3293 Operand *Ptr, Operand *Val) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003294 bool NeedsCmpxchg = false;
Jim Stichnothae953202014-12-20 06:17:49 -08003295 LowerBinOp Op_Lo = nullptr;
3296 LowerBinOp Op_Hi = nullptr;
Jan Voung5cd240d2014-06-25 10:36:46 -07003297 switch (Operation) {
3298 default:
3299 Func->setError("Unknown AtomicRMW operation");
3300 return;
3301 case Intrinsics::AtomicAdd: {
3302 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003303 // All the fall-through paths must set this to true, but use this
3304 // for asserting.
3305 NeedsCmpxchg = true;
3306 Op_Lo = &TargetX8632::_add;
3307 Op_Hi = &TargetX8632::_adc;
3308 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003309 }
3310 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3311 const bool Locked = true;
Jim Stichnothae953202014-12-20 06:17:49 -08003312 Variable *T = nullptr;
Jan Voung5cd240d2014-06-25 10:36:46 -07003313 _mov(T, Val);
3314 _xadd(Addr, T, Locked);
3315 _mov(Dest, T);
3316 return;
3317 }
3318 case Intrinsics::AtomicSub: {
3319 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003320 NeedsCmpxchg = true;
3321 Op_Lo = &TargetX8632::_sub;
3322 Op_Hi = &TargetX8632::_sbb;
3323 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003324 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003325 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3326 const bool Locked = true;
Jim Stichnothae953202014-12-20 06:17:49 -08003327 Variable *T = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003328 _mov(T, Val);
3329 _neg(T);
3330 _xadd(Addr, T, Locked);
3331 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003332 return;
3333 }
3334 case Intrinsics::AtomicOr:
Jan Vounga3a01a22014-07-14 10:32:41 -07003335 // TODO(jvoung): If Dest is null or dead, then some of these
3336 // operations do not need an "exchange", but just a locked op.
3337 // That appears to be "worth" it for sub, or, and, and xor.
3338 // xadd is probably fine vs lock add for add, and xchg is fine
3339 // vs an atomic store.
3340 NeedsCmpxchg = true;
3341 Op_Lo = &TargetX8632::_or;
3342 Op_Hi = &TargetX8632::_or;
3343 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003344 case Intrinsics::AtomicAnd:
Jan Vounga3a01a22014-07-14 10:32:41 -07003345 NeedsCmpxchg = true;
3346 Op_Lo = &TargetX8632::_and;
3347 Op_Hi = &TargetX8632::_and;
3348 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003349 case Intrinsics::AtomicXor:
Jan Vounga3a01a22014-07-14 10:32:41 -07003350 NeedsCmpxchg = true;
3351 Op_Lo = &TargetX8632::_xor;
3352 Op_Hi = &TargetX8632::_xor;
3353 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003354 case Intrinsics::AtomicExchange:
Jan Vounga3a01a22014-07-14 10:32:41 -07003355 if (Dest->getType() == IceType_i64) {
3356 NeedsCmpxchg = true;
3357 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3358 // just need to be moved to the ecx and ebx registers.
Jim Stichnothae953202014-12-20 06:17:49 -08003359 Op_Lo = nullptr;
3360 Op_Hi = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003361 break;
3362 }
3363 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
Jim Stichnothae953202014-12-20 06:17:49 -08003364 Variable *T = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003365 _mov(T, Val);
3366 _xchg(Addr, T);
3367 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003368 return;
3369 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003370 // Otherwise, we need a cmpxchg loop.
Jim Stichnoth6e992142014-07-30 14:45:20 -07003371 (void)NeedsCmpxchg;
Jan Vounga3a01a22014-07-14 10:32:41 -07003372 assert(NeedsCmpxchg);
3373 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3374}
3375
3376void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
3377 Variable *Dest, Operand *Ptr,
3378 Operand *Val) {
3379 // Expand a more complex RMW operation as a cmpxchg loop:
3380 // For 64-bit:
3381 // mov eax, [ptr]
3382 // mov edx, [ptr + 4]
3383 // .LABEL:
3384 // mov ebx, eax
3385 // <Op_Lo> ebx, <desired_adj_lo>
3386 // mov ecx, edx
3387 // <Op_Hi> ecx, <desired_adj_hi>
3388 // lock cmpxchg8b [ptr]
3389 // jne .LABEL
3390 // mov <dest_lo>, eax
3391 // mov <dest_lo>, edx
3392 //
3393 // For 32-bit:
3394 // mov eax, [ptr]
3395 // .LABEL:
3396 // mov <reg>, eax
3397 // op <reg>, [desired_adj]
3398 // lock cmpxchg [ptr], <reg>
3399 // jne .LABEL
3400 // mov <dest>, eax
3401 //
Jim Stichnothae953202014-12-20 06:17:49 -08003402 // If Op_{Lo,Hi} are nullptr, then just copy the value.
Jan Vounga3a01a22014-07-14 10:32:41 -07003403 Val = legalize(Val);
3404 Type Ty = Val->getType();
3405 if (Ty == IceType_i64) {
Jan Voungbd385e42014-09-18 18:18:10 -07003406 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3407 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003408 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3409 _mov(T_eax, loOperand(Addr));
3410 _mov(T_edx, hiOperand(Addr));
Jan Voungbd385e42014-09-18 18:18:10 -07003411 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3412 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003413 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothae953202014-12-20 06:17:49 -08003414 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003415 if (!IsXchg8b) {
3416 Context.insert(Label);
3417 _mov(T_ebx, T_eax);
3418 (this->*Op_Lo)(T_ebx, loOperand(Val));
3419 _mov(T_ecx, T_edx);
3420 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3421 } else {
3422 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3423 // It just needs the Val loaded into ebx and ecx.
3424 // That can also be done before the loop.
3425 _mov(T_ebx, loOperand(Val));
3426 _mov(T_ecx, hiOperand(Val));
3427 Context.insert(Label);
3428 }
3429 const bool Locked = true;
3430 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003431 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003432 if (!IsXchg8b) {
3433 // If Val is a variable, model the extended live range of Val through
3434 // the end of the loop, since it will be re-used by the loop.
3435 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3436 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3437 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3438 Context.insert(InstFakeUse::create(Func, ValLo));
3439 Context.insert(InstFakeUse::create(Func, ValHi));
3440 }
3441 } else {
3442 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3443 Context.insert(InstFakeUse::create(Func, T_ebx));
3444 Context.insert(InstFakeUse::create(Func, T_ecx));
3445 }
3446 // The address base is also reused in the loop.
3447 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3448 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3449 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3450 _mov(DestLo, T_eax);
3451 _mov(DestHi, T_edx);
3452 return;
3453 }
3454 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
Jan Voungbd385e42014-09-18 18:18:10 -07003455 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003456 _mov(T_eax, Addr);
3457 InstX8632Label *Label = InstX8632Label::create(Func, this);
3458 Context.insert(Label);
3459 // We want to pick a different register for T than Eax, so don't use
Jim Stichnothae953202014-12-20 06:17:49 -08003460 // _mov(T == nullptr, T_eax).
Jan Vounga3a01a22014-07-14 10:32:41 -07003461 Variable *T = makeReg(Ty);
3462 _mov(T, T_eax);
3463 (this->*Op_Lo)(T, Val);
3464 const bool Locked = true;
3465 _cmpxchg(Addr, T_eax, T, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003466 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003467 // If Val is a variable, model the extended live range of Val through
3468 // the end of the loop, since it will be re-used by the loop.
3469 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3470 Context.insert(InstFakeUse::create(Func, ValVar));
3471 }
3472 // The address base is also reused in the loop.
3473 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3474 _mov(Dest, T_eax);
Jan Voung5cd240d2014-06-25 10:36:46 -07003475}
3476
Jan Vounge4da26f2014-07-15 17:52:39 -07003477// Lowers count {trailing, leading} zeros intrinsic.
3478//
3479// We could do constant folding here, but that should have
3480// been done by the front-end/middle-end optimizations.
3481void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3482 Operand *FirstVal, Operand *SecondVal) {
3483 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3484 // Then the instructions will handle the Val == 0 case much more simply
3485 // and won't require conversion from bit position to number of zeros.
3486 //
3487 // Otherwise:
3488 // bsr IF_NOT_ZERO, Val
3489 // mov T_DEST, 63
3490 // cmovne T_DEST, IF_NOT_ZERO
3491 // xor T_DEST, 31
3492 // mov DEST, T_DEST
3493 //
3494 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3495 // register. Also, bsf and bsr require their dest to be a register.
3496 //
3497 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3498 // E.g., for 000... 00001100, bsr will say that the most significant bit
3499 // set is at position 3, while the number of leading zeros is 28. Xor is
3500 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3501 //
3502 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3503 // are all zero, and compute the result for that case (checking the lower
3504 // 32 bits). Then actually compute the result for the upper bits and
3505 // cmov in the result from the lower computation if the earlier speculation
3506 // was correct.
3507 //
3508 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3509 // bit position conversion, and the speculation is reversed.
3510 assert(Ty == IceType_i32 || Ty == IceType_i64);
3511 Variable *T = makeReg(IceType_i32);
Matt Wala53c5e602014-07-22 16:39:38 -07003512 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
Jan Vounge4da26f2014-07-15 17:52:39 -07003513 if (Cttz) {
Matt Wala53c5e602014-07-22 16:39:38 -07003514 _bsf(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003515 } else {
Matt Wala53c5e602014-07-22 16:39:38 -07003516 _bsr(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003517 }
3518 Variable *T_Dest = makeReg(IceType_i32);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003519 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3520 Constant *ThirtyOne = Ctx->getConstantInt32(31);
Jan Vounge4da26f2014-07-15 17:52:39 -07003521 if (Cttz) {
3522 _mov(T_Dest, ThirtyTwo);
3523 } else {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003524 Constant *SixtyThree = Ctx->getConstantInt32(63);
Jan Vounge4da26f2014-07-15 17:52:39 -07003525 _mov(T_Dest, SixtyThree);
3526 }
Jan Voungbd385e42014-09-18 18:18:10 -07003527 _cmov(T_Dest, T, CondX86::Br_ne);
Jan Vounge4da26f2014-07-15 17:52:39 -07003528 if (!Cttz) {
3529 _xor(T_Dest, ThirtyOne);
3530 }
3531 if (Ty == IceType_i32) {
3532 _mov(Dest, T_Dest);
3533 return;
3534 }
3535 _add(T_Dest, ThirtyTwo);
3536 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3537 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3538 // Will be using "test" on this, so we need a registerized variable.
3539 Variable *SecondVar = legalizeToVar(SecondVal);
3540 Variable *T_Dest2 = makeReg(IceType_i32);
3541 if (Cttz) {
3542 _bsf(T_Dest2, SecondVar);
3543 } else {
3544 _bsr(T_Dest2, SecondVar);
3545 _xor(T_Dest2, ThirtyOne);
3546 }
3547 _test(SecondVar, SecondVar);
Jan Voungbd385e42014-09-18 18:18:10 -07003548 _cmov(T_Dest2, T_Dest, CondX86::Br_e);
Jan Vounge4da26f2014-07-15 17:52:39 -07003549 _mov(DestLo, T_Dest2);
3550 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3551}
3552
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003553namespace {
3554
3555bool isAdd(const Inst *Inst) {
3556 if (const InstArithmetic *Arith =
3557 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3558 return (Arith->getOp() == InstArithmetic::Add);
3559 }
3560 return false;
3561}
3562
Jim Stichnoth89d79562014-08-27 13:50:03 -07003563void dumpAddressOpt(const Cfg *Func, const Variable *Base,
3564 const Variable *Index, uint16_t Shift, int32_t Offset,
3565 const Inst *Reason) {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08003566 if (!ALLOW_DUMP)
3567 return;
Jim Stichnoth89d79562014-08-27 13:50:03 -07003568 if (!Func->getContext()->isVerbose(IceV_AddrOpt))
3569 return;
3570 Ostream &Str = Func->getContext()->getStrDump();
3571 Str << "Instruction: ";
3572 Reason->dumpDecorated(Func);
3573 Str << " results in Base=";
3574 if (Base)
3575 Base->dump(Func);
3576 else
3577 Str << "<null>";
3578 Str << ", Index=";
3579 if (Index)
3580 Index->dump(Func);
3581 else
3582 Str << "<null>";
3583 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
3584}
3585
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003586bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var,
3587 const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003588 // Var originates from Var=SrcVar ==>
3589 // set Var:=SrcVar
Jim Stichnothae953202014-12-20 06:17:49 -08003590 if (Var == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003591 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003592 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
3593 assert(!VMetadata->isMultiDef(Var));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003594 if (llvm::isa<InstAssign>(VarAssign)) {
3595 Operand *SrcOp = VarAssign->getSrc(0);
3596 assert(SrcOp);
3597 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003598 if (!VMetadata->isMultiDef(SrcVar) &&
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003599 // TODO: ensure SrcVar stays single-BB
3600 true) {
3601 Var = SrcVar;
3602 Reason = VarAssign;
3603 return true;
3604 }
3605 }
3606 }
3607 }
3608 return false;
3609}
3610
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003611bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable *&Base,
3612 Variable *&Index, uint16_t &Shift,
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003613 const Inst *&Reason) {
Jim Stichnothae953202014-12-20 06:17:49 -08003614 // Index==nullptr && Base is Base=Var1+Var2 ==>
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003615 // set Base=Var1, Index=Var2, Shift=0
Jim Stichnothae953202014-12-20 06:17:49 -08003616 if (Base == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003617 return false;
Jim Stichnothae953202014-12-20 06:17:49 -08003618 if (Index != nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003619 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003620 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothae953202014-12-20 06:17:49 -08003621 if (BaseInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003622 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003623 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003624 if (BaseInst->getSrcSize() < 2)
3625 return false;
3626 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003627 if (VMetadata->isMultiDef(Var1))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003628 return false;
3629 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003630 if (VMetadata->isMultiDef(Var2))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003631 return false;
3632 if (isAdd(BaseInst) &&
3633 // TODO: ensure Var1 and Var2 stay single-BB
3634 true) {
3635 Base = Var1;
3636 Index = Var2;
3637 Shift = 0; // should already have been 0
3638 Reason = BaseInst;
3639 return true;
3640 }
3641 }
3642 }
3643 return false;
3644}
3645
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003646bool matchShiftedIndex(const VariablesMetadata *VMetadata, Variable *&Index,
3647 uint16_t &Shift, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003648 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
3649 // Index=Var, Shift+=log2(Const)
Jim Stichnothae953202014-12-20 06:17:49 -08003650 if (Index == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003651 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003652 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
Jim Stichnothae953202014-12-20 06:17:49 -08003653 if (IndexInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003654 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003655 assert(!VMetadata->isMultiDef(Index));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003656 if (IndexInst->getSrcSize() < 2)
3657 return false;
3658 if (const InstArithmetic *ArithInst =
3659 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
3660 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
Jan Voungbc004632014-09-16 15:09:10 -07003661 if (ConstantInteger32 *Const =
3662 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003663 if (ArithInst->getOp() == InstArithmetic::Mul &&
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003664 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003665 uint64_t Mult = Const->getValue();
3666 uint32_t LogMult;
3667 switch (Mult) {
3668 case 1:
3669 LogMult = 0;
3670 break;
3671 case 2:
3672 LogMult = 1;
3673 break;
3674 case 4:
3675 LogMult = 2;
3676 break;
3677 case 8:
3678 LogMult = 3;
3679 break;
3680 default:
3681 return false;
3682 }
3683 if (Shift + LogMult <= 3) {
3684 Index = Var;
3685 Shift += LogMult;
3686 Reason = IndexInst;
3687 return true;
3688 }
3689 }
3690 }
3691 }
3692 }
3693 return false;
3694}
3695
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003696bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
3697 int32_t &Offset, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003698 // Base is Base=Var+Const || Base is Base=Const+Var ==>
3699 // set Base=Var, Offset+=Const
3700 // Base is Base=Var-Const ==>
3701 // set Base=Var, Offset-=Const
Jim Stichnothae953202014-12-20 06:17:49 -08003702 if (Base == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003703 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003704 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothae953202014-12-20 06:17:49 -08003705 if (BaseInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003706 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003707 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003708 if (const InstArithmetic *ArithInst =
3709 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
3710 if (ArithInst->getOp() != InstArithmetic::Add &&
3711 ArithInst->getOp() != InstArithmetic::Sub)
3712 return false;
3713 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
Jim Stichnothae953202014-12-20 06:17:49 -08003714 Variable *Var = nullptr;
3715 ConstantInteger32 *Const = nullptr;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003716 if (Variable *VariableOperand =
3717 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3718 Var = VariableOperand;
Jan Voungbc004632014-09-16 15:09:10 -07003719 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003720 } else if (IsAdd) {
Jan Voungbc004632014-09-16 15:09:10 -07003721 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003722 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
3723 }
Jim Stichnothae953202014-12-20 06:17:49 -08003724 if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003725 return false;
Jan Voungbc004632014-09-16 15:09:10 -07003726 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
Jan Voung8acded02014-09-22 18:02:25 -07003727 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
Jan Voungbc004632014-09-16 15:09:10 -07003728 return false;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003729 Base = Var;
Jan Voungbc004632014-09-16 15:09:10 -07003730 Offset += MoreOffset;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003731 Reason = BaseInst;
3732 return true;
3733 }
3734 return false;
3735}
3736
Jim Stichnoth89d79562014-08-27 13:50:03 -07003737void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
3738 Variable *&Index, uint16_t &Shift, int32_t &Offset) {
Jim Stichnoth800dab22014-09-20 12:25:02 -07003739 Func->resetCurrentNode();
Jim Stichnoth89d79562014-08-27 13:50:03 -07003740 if (Func->getContext()->isVerbose(IceV_AddrOpt)) {
3741 Ostream &Str = Func->getContext()->getStrDump();
3742 Str << "\nStarting computeAddressOpt for instruction:\n ";
3743 Instr->dumpDecorated(Func);
3744 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003745 (void)Offset; // TODO: pattern-match for non-zero offsets.
Jim Stichnothae953202014-12-20 06:17:49 -08003746 if (Base == nullptr)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003747 return;
3748 // If the Base has more than one use or is live across multiple
3749 // blocks, then don't go further. Alternatively (?), never consider
3750 // a transformation that would change a variable that is currently
3751 // *not* live across basic block boundaries into one that *is*.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003752 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003753 return;
3754
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003755 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003756 bool Continue = true;
3757 while (Continue) {
Jim Stichnothae953202014-12-20 06:17:49 -08003758 const Inst *Reason = nullptr;
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003759 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
3760 matchTransitiveAssign(VMetadata, Index, Reason) ||
3761 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
3762 matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
3763 matchOffsetBase(VMetadata, Base, Offset, Reason)) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003764 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
3765 } else {
3766 Continue = false;
Matt Wala8835b892014-08-11 17:46:58 -07003767 }
3768
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003769 // Index is Index=Var<<Const && Const+Shift<=3 ==>
3770 // Index=Var, Shift+=Const
3771
3772 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
3773 // Index=Var, Shift+=log2(Const)
3774
3775 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
3776 // swap(Index,Base)
3777 // Similar for Base=Const*Var and Base=Var<<Const
3778
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003779 // Index is Index=Var+Const ==>
3780 // set Index=Var, Offset+=(Const<<Shift)
3781
3782 // Index is Index=Const+Var ==>
3783 // set Index=Var, Offset+=(Const<<Shift)
3784
3785 // Index is Index=Var-Const ==>
3786 // set Index=Var, Offset-=(Const<<Shift)
3787
3788 // TODO: consider overflow issues with respect to Offset.
3789 // TODO: handle symbolic constants.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003790 }
3791}
3792
3793} // anonymous namespace
3794
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003795void TargetX8632::lowerLoad(const InstLoad *Inst) {
3796 // A Load instruction can be treated the same as an Assign
3797 // instruction, after the source operand is transformed into an
3798 // OperandX8632Mem operand. Note that the address mode
3799 // optimization already creates an OperandX8632Mem operand, so it
3800 // doesn't need another level of transformation.
3801 Type Ty = Inst->getDest()->getType();
Jan Voung5cd240d2014-06-25 10:36:46 -07003802 Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003803
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003804 // Fuse this load with a subsequent Arithmetic instruction in the
3805 // following situations:
3806 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
3807 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
3808 //
3809 // TODO: Clean up and test thoroughly.
Jan Voung5cd240d2014-06-25 10:36:46 -07003810 // (E.g., if there is an mfence-all make sure the load ends up on the
3811 // same side of the fence).
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003812 //
3813 // TODO: Why limit to Arithmetic instructions? This could probably be
3814 // applied to most any instruction type. Look at all source operands
3815 // in the following instruction, and if there is one instance of the
3816 // load instruction's dest variable, and that instruction ends that
3817 // variable's live range, then make the substitution. Deal with
3818 // commutativity optimization in the arithmetic instruction lowering.
Jim Stichnothae953202014-12-20 06:17:49 -08003819 InstArithmetic *NewArith = nullptr;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003820 if (InstArithmetic *Arith =
3821 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
3822 Variable *DestLoad = Inst->getDest();
3823 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
3824 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
3825 if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
3826 DestLoad != Src0Arith) {
3827 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3828 Arith->getSrc(0), Src0);
3829 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
3830 Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
3831 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3832 Arith->getSrc(1), Src0);
3833 }
3834 if (NewArith) {
3835 Arith->setDeleted();
3836 Context.advanceNext();
3837 lowerArithmetic(NewArith);
3838 return;
3839 }
3840 }
3841
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003842 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
3843 lowerAssign(Assign);
3844}
3845
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003846void TargetX8632::doAddressOptLoad() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08003847 Inst *Inst = Context.getCur();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003848 Variable *Dest = Inst->getDest();
3849 Operand *Addr = Inst->getSrc(0);
Jim Stichnothae953202014-12-20 06:17:49 -08003850 Variable *Index = nullptr;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003851 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003852 int32_t Offset = 0; // TODO: make Constant
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003853 // Vanilla ICE load instructions should not use the segment registers,
3854 // and computeAddressOpt only works at the level of Variables and Constants,
3855 // not other OperandX8632Mem, so there should be no mention of segment
3856 // registers there either.
3857 const OperandX8632Mem::SegmentRegisters SegmentReg =
3858 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003859 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jim Stichnoth89d79562014-08-27 13:50:03 -07003860 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003861 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07003862 Inst->setDeleted();
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003863 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003864 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003865 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003866 Context.insert(InstLoad::create(Func, Dest, Addr));
3867 }
3868}
3869
Matt Walac3302742014-08-15 16:21:56 -07003870void TargetX8632::randomlyInsertNop(float Probability) {
3871 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
3872 if (RNG.getTrueWithProbability(Probability)) {
Jim Stichnothe6d24782014-12-19 05:42:24 -08003873 _nop(RNG(X86_NUM_NOP_VARIANTS));
Matt Walac3302742014-08-15 16:21:56 -07003874 }
3875}
3876
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003877void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
3878 Func->setError("Phi found in regular instruction list");
3879}
3880
3881void TargetX8632::lowerRet(const InstRet *Inst) {
Jim Stichnothae953202014-12-20 06:17:49 -08003882 Variable *Reg = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003883 if (Inst->hasRetValue()) {
3884 Operand *Src0 = legalize(Inst->getRetValue());
3885 if (Src0->getType() == IceType_i64) {
Jim Stichnothad403532014-09-25 12:44:17 -07003886 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
3887 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003888 Reg = eax;
3889 Context.insert(InstFakeUse::create(Func, edx));
Jan Voung3a569182014-09-29 10:16:01 -07003890 } else if (isScalarFloatingType(Src0->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003891 _fld(Src0);
Matt Wala928f1292014-07-07 16:50:46 -07003892 } else if (isVectorType(Src0->getType())) {
Jim Stichnothad403532014-09-25 12:44:17 -07003893 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003894 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07003895 _mov(Reg, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003896 }
3897 }
3898 _ret(Reg);
3899 // Add a fake use of esp to make sure esp stays alive for the entire
3900 // function. Otherwise post-call esp adjustments get dead-code
3901 // eliminated. TODO: Are there more places where the fake use
3902 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
3903 // have a ret instruction.
Jan Voungbd385e42014-09-18 18:18:10 -07003904 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003905 Context.insert(InstFakeUse::create(Func, esp));
3906}
3907
3908void TargetX8632::lowerSelect(const InstSelect *Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003909 Variable *Dest = Inst->getDest();
3910 Operand *SrcT = Inst->getTrueOperand();
3911 Operand *SrcF = Inst->getFalseOperand();
Matt Wala9cb61e22014-07-24 09:44:42 -07003912 Operand *Condition = Inst->getCondition();
3913
3914 if (isVectorType(Dest->getType())) {
Matt Wala9cb61e22014-07-24 09:44:42 -07003915 Type SrcTy = SrcT->getType();
3916 Variable *T = makeReg(SrcTy);
Matt Walae3777672014-07-31 09:06:17 -07003917 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3918 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07003919 if (InstructionSet >= SSE4_1) {
3920 // TODO(wala): If the condition operand is a constant, use blendps
3921 // or pblendw.
3922 //
3923 // Use blendvps or pblendvb to implement select.
3924 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3925 SrcTy == IceType_v4f32) {
Matt Walae3777672014-07-31 09:06:17 -07003926 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Jan Voungbd385e42014-09-18 18:18:10 -07003927 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
Matt Walae3777672014-07-31 09:06:17 -07003928 _movp(xmm0, ConditionRM);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003929 _psll(xmm0, Ctx->getConstantInt8(31));
Matt Walae3777672014-07-31 09:06:17 -07003930 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07003931 _blendvps(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003932 _movp(Dest, T);
3933 } else {
3934 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3935 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
Jim Stichnothfac55172014-10-01 13:06:21 -07003936 : IceType_v16i8;
Jan Voungbd385e42014-09-18 18:18:10 -07003937 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003938 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
Matt Walae3777672014-07-31 09:06:17 -07003939 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07003940 _pblendvb(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003941 _movp(Dest, T);
3942 }
3943 return;
3944 }
3945 // Lower select without SSE4.1:
3946 // a=d?b:c ==>
3947 // if elementtype(d) != i1:
3948 // d=sext(d);
3949 // a=(b&d)|(c&~d);
Matt Wala9cb61e22014-07-24 09:44:42 -07003950 Variable *T2 = makeReg(SrcTy);
3951 // Sign extend the condition operand if applicable.
3952 if (SrcTy == IceType_v4f32) {
3953 // The sext operation takes only integer arguments.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003954 Variable *T3 = Func->makeVariable(IceType_v4i32);
Matt Wala9cb61e22014-07-24 09:44:42 -07003955 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3956 _movp(T, T3);
3957 } else if (typeElementType(SrcTy) != IceType_i1) {
3958 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3959 } else {
Matt Walae3777672014-07-31 09:06:17 -07003960 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3961 _movp(T, ConditionRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07003962 }
Matt Wala9cb61e22014-07-24 09:44:42 -07003963 _movp(T2, T);
Matt Walad4799f42014-08-14 14:24:12 -07003964 _pand(T, SrcTRM);
3965 _pandn(T2, SrcFRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07003966 _por(T, T2);
3967 _movp(Dest, T);
Matt Wala9cb61e22014-07-24 09:44:42 -07003968
3969 return;
3970 }
3971
3972 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003973 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Matt Wala43ff7eb2014-06-18 10:30:07 -07003974 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003975 InstX8632Label *Label = InstX8632Label::create(Func, this);
3976
3977 if (Dest->getType() == IceType_i64) {
3978 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3979 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Jim Stichnothad403532014-09-25 12:44:17 -07003980 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
3981 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003982 _cmp(ConditionRM, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003983 _mov(DestLo, SrcLoRI);
3984 _mov(DestHi, SrcHiRI);
Jan Voungbd385e42014-09-18 18:18:10 -07003985 _br(CondX86::Br_ne, Label);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003986 Operand *SrcFLo = loOperand(SrcF);
3987 Operand *SrcFHi = hiOperand(SrcF);
Jim Stichnothad403532014-09-25 12:44:17 -07003988 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
3989 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07003990 _mov_nonkillable(DestLo, SrcLoRI);
3991 _mov_nonkillable(DestHi, SrcHiRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003992 } else {
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003993 _cmp(ConditionRM, Zero);
Jim Stichnothad403532014-09-25 12:44:17 -07003994 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003995 _mov(Dest, SrcT);
Jan Voungbd385e42014-09-18 18:18:10 -07003996 _br(CondX86::Br_ne, Label);
Jim Stichnothad403532014-09-25 12:44:17 -07003997 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07003998 _mov_nonkillable(Dest, SrcF);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003999 }
4000
4001 Context.insert(Label);
4002}
4003
4004void TargetX8632::lowerStore(const InstStore *Inst) {
4005 Operand *Value = Inst->getData();
4006 Operand *Addr = Inst->getAddr();
Jan Voung5cd240d2014-06-25 10:36:46 -07004007 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
Matt Wala105b7042014-08-11 19:56:19 -07004008 Type Ty = NewAddr->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004009
Matt Wala105b7042014-08-11 19:56:19 -07004010 if (Ty == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004011 Value = legalize(Value);
Jim Stichnothad403532014-09-25 12:44:17 -07004012 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4013 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004014 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
4015 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
Matt Wala105b7042014-08-11 19:56:19 -07004016 } else if (isVectorType(Ty)) {
4017 _storep(legalizeToVar(Value), NewAddr);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004018 } else {
Jim Stichnothad403532014-09-25 12:44:17 -07004019 Value = legalize(Value, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004020 _store(Value, NewAddr);
4021 }
4022}
4023
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004024void TargetX8632::doAddressOptStore() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08004025 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004026 Operand *Data = Inst->getData();
4027 Operand *Addr = Inst->getAddr();
Jim Stichnothae953202014-12-20 06:17:49 -08004028 Variable *Index = nullptr;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004029 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004030 int32_t Offset = 0; // TODO: make Constant
4031 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004032 // Vanilla ICE store instructions should not use the segment registers,
4033 // and computeAddressOpt only works at the level of Variables and Constants,
4034 // not other OperandX8632Mem, so there should be no mention of segment
4035 // registers there either.
4036 const OperandX8632Mem::SegmentRegisters SegmentReg =
4037 OperandX8632Mem::DefaultSegment;
Jim Stichnoth89d79562014-08-27 13:50:03 -07004038 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004039 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004040 Inst->setDeleted();
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004041 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004042 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004043 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004044 Context.insert(InstStore::create(Func, Data, Addr));
4045 }
4046}
4047
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004048void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4049 // This implements the most naive possible lowering.
4050 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4051 Operand *Src0 = Inst->getComparison();
4052 SizeT NumCases = Inst->getNumCases();
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004053 if (Src0->getType() == IceType_i64) {
4054 Src0 = legalize(Src0); // get Base/Index into physical registers
4055 Operand *Src0Lo = loOperand(Src0);
4056 Operand *Src0Hi = hiOperand(Src0);
4057 if (NumCases >= 2) {
4058 Src0Lo = legalizeToVar(Src0Lo);
4059 Src0Hi = legalizeToVar(Src0Hi);
4060 } else {
4061 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4062 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4063 }
4064 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004065 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4066 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004067 InstX8632Label *Label = InstX8632Label::create(Func, this);
4068 _cmp(Src0Lo, ValueLo);
4069 _br(CondX86::Br_ne, Label);
4070 _cmp(Src0Hi, ValueHi);
4071 _br(CondX86::Br_e, Inst->getLabel(I));
4072 Context.insert(Label);
4073 }
4074 _br(Inst->getLabelDefault());
4075 return;
4076 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004077 // OK, we'll be slightly less naive by forcing Src into a physical
4078 // register if there are 2 or more uses.
4079 if (NumCases >= 2)
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004080 Src0 = legalizeToVar(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004081 else
Jim Stichnothad403532014-09-25 12:44:17 -07004082 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004083 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004084 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004085 _cmp(Src0, Value);
Jan Voungbd385e42014-09-18 18:18:10 -07004086 _br(CondX86::Br_e, Inst->getLabel(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004087 }
4088
4089 _br(Inst->getLabelDefault());
4090}
4091
Matt Walaafeaee42014-08-07 13:47:30 -07004092void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4093 Variable *Dest, Operand *Src0,
4094 Operand *Src1) {
4095 assert(isVectorType(Dest->getType()));
4096 Type Ty = Dest->getType();
4097 Type ElementTy = typeElementType(Ty);
4098 SizeT NumElements = typeNumElements(Ty);
4099
4100 Operand *T = Ctx->getConstantUndef(Ty);
4101 for (SizeT I = 0; I < NumElements; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004102 Constant *Index = Ctx->getConstantInt32(I);
Matt Walaafeaee42014-08-07 13:47:30 -07004103
4104 // Extract the next two inputs.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004105 Variable *Op0 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004106 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004107 Variable *Op1 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004108 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4109
4110 // Perform the arithmetic as a scalar operation.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004111 Variable *Res = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004112 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4113
4114 // Insert the result into position.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004115 Variable *DestT = Func->makeVariable(Ty);
Matt Walaafeaee42014-08-07 13:47:30 -07004116 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4117 T = DestT;
Matt Walaafeaee42014-08-07 13:47:30 -07004118 }
4119
4120 lowerAssign(InstAssign::create(Func, Dest, T));
4121}
4122
Matt Walace0ca8f2014-07-24 12:34:20 -07004123// The following pattern occurs often in lowered C and C++ code:
4124//
4125// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4126// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4127//
4128// We can eliminate the sext operation by copying the result of pcmpeqd,
4129// pcmpgtd, or cmpps (which produce sign extended results) to the result
4130// of the sext operation.
4131void
4132TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
4133 if (InstCast *NextCast =
4134 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4135 if (NextCast->getCastKind() == InstCast::Sext &&
4136 NextCast->getSrc(0) == SignExtendedResult) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004137 NextCast->setDeleted();
Matt Walace0ca8f2014-07-24 12:34:20 -07004138 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4139 // Skip over the instruction.
Matt Walace0ca8f2014-07-24 12:34:20 -07004140 Context.advanceNext();
4141 }
4142 }
4143}
4144
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004145void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
4146 const SizeT MaxSrcs = 0;
Jim Stichnothae953202014-12-20 06:17:49 -08004147 Variable *Dest = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004148 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
4149 lowerCall(Call);
4150}
4151
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004152// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4153// preserve integrity of liveness analysis. Undef values are also
4154// turned into zeroes, since loOperand() and hiOperand() don't expect
4155// Undef input.
4156void TargetX8632::prelowerPhis() {
4157 CfgNode *Node = Context.getNode();
Jim Stichnoth1502e592014-12-11 09:22:45 -08004158 for (auto I = Node->getPhis().begin(), E = Node->getPhis().end(); I != E;
4159 ++I) {
4160 auto Phi = llvm::dyn_cast<InstPhi>(I);
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004161 if (Phi->isDeleted())
4162 continue;
4163 Variable *Dest = Phi->getDest();
4164 if (Dest->getType() == IceType_i64) {
4165 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4166 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4167 InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
4168 InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);
4169 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
4170 Operand *Src = Phi->getSrc(I);
4171 CfgNode *Label = Phi->getLabel(I);
4172 if (llvm::isa<ConstantUndef>(Src))
4173 Src = Ctx->getConstantZero(Dest->getType());
4174 PhiLo->addArgument(loOperand(Src), Label);
4175 PhiHi->addArgument(hiOperand(Src), Label);
4176 }
4177 Node->getPhis().push_back(PhiLo);
4178 Node->getPhis().push_back(PhiHi);
4179 Phi->setDeleted();
4180 }
4181 }
4182}
4183
4184namespace {
4185
4186bool isMemoryOperand(const Operand *Opnd) {
4187 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
4188 return !Var->hasReg();
4189 if (llvm::isa<Constant>(Opnd))
4190 return isScalarFloatingType(Opnd->getType());
4191 return true;
4192}
4193
4194} // end of anonymous namespace
4195
4196// Lower the pre-ordered list of assignments into mov instructions.
4197// Also has to do some ad-hoc register allocation as necessary.
4198void TargetX8632::lowerPhiAssignments(CfgNode *Node,
4199 const AssignList &Assignments) {
4200 // Check that this is a properly initialized shell of a node.
4201 assert(Node->getOutEdges().size() == 1);
4202 assert(Node->getInsts().empty());
4203 assert(Node->getPhis().empty());
Jim Stichnothbfb410d2014-11-05 16:04:05 -08004204 CfgNode *Succ = Node->getOutEdges().front();
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004205 getContext().init(Node);
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004206 // Register set setup similar to regAlloc().
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004207 RegSetMask RegInclude = RegSet_All;
4208 RegSetMask RegExclude = RegSet_StackPointer;
4209 if (hasFramePointer())
4210 RegExclude |= RegSet_FramePointer;
4211 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
4212 bool NeedsRegs = false;
4213 // Initialize the set of available registers to the set of what is
4214 // available (not live) at the beginning of the successor block,
4215 // minus all registers used as Dest operands in the Assignments. To
4216 // do this, we start off assuming all registers are available, then
4217 // iterate through the Assignments and remove Dest registers.
4218 // During this iteration, we also determine whether we will actually
4219 // need any extra registers for memory-to-memory copies. If so, we
4220 // do the actual work of removing the live-in registers from the
4221 // set. TODO(stichnot): This work is being repeated for every split
4222 // edge to the successor, so consider updating LiveIn just once
4223 // after all the edges are split.
Jim Stichnoth1502e592014-12-11 09:22:45 -08004224 for (auto I = Assignments.begin(), E = Assignments.end(); I != E; ++I) {
4225 Variable *Dest = I->getDest();
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004226 if (Dest->hasReg()) {
4227 Available[Dest->getRegNum()] = false;
Jim Stichnoth1502e592014-12-11 09:22:45 -08004228 } else if (isMemoryOperand(I->getSrc(0))) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004229 NeedsRegs = true; // Src and Dest are both in memory
4230 }
4231 }
4232 if (NeedsRegs) {
4233 LivenessBV &LiveIn = Func->getLiveness()->getLiveIn(Succ);
4234 for (int i = LiveIn.find_first(); i != -1; i = LiveIn.find_next(i)) {
4235 Variable *Var = Func->getLiveness()->getVariable(i, Succ);
4236 if (Var->hasReg())
4237 Available[Var->getRegNum()] = false;
4238 }
4239 }
4240 // Iterate backwards through the Assignments. After lowering each
4241 // assignment, add Dest to the set of available registers, and
4242 // remove Src from the set of available registers. Iteration is
4243 // done backwards to enable incremental updates of the available
4244 // register set, and the lowered instruction numbers may be out of
4245 // order, but that can be worked around by renumbering the block
4246 // afterwards if necessary.
4247 for (auto I = Assignments.rbegin(), E = Assignments.rend(); I != E; ++I) {
4248 Context.rewind();
Jim Stichnoth1502e592014-12-11 09:22:45 -08004249 auto Assign = llvm::dyn_cast<InstAssign>(&*I);
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004250 Variable *Dest = Assign->getDest();
4251 Operand *Src = Assign->getSrc(0);
4252 Variable *SrcVar = llvm::dyn_cast<Variable>(Src);
4253 // Use normal assignment lowering, except lower mem=mem specially
4254 // so we can register-allocate at the same time.
4255 if (!isMemoryOperand(Dest) || !isMemoryOperand(Src)) {
4256 lowerAssign(Assign);
4257 } else {
4258 assert(Dest->getType() == Src->getType());
4259 const llvm::SmallBitVector &RegsForType =
4260 getRegisterSetForType(Dest->getType());
4261 llvm::SmallBitVector AvailRegsForType = RegsForType & Available;
Jim Stichnothae953202014-12-20 06:17:49 -08004262 Variable *SpillLoc = nullptr;
4263 Variable *Preg = nullptr;
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004264 // TODO(stichnot): Opportunity for register randomization.
4265 int32_t RegNum = AvailRegsForType.find_first();
4266 bool IsVector = isVectorType(Dest->getType());
4267 bool NeedSpill = (RegNum == -1);
4268 if (NeedSpill) {
4269 // Pick some register to spill and update RegNum.
4270 // TODO(stichnot): Opportunity for register randomization.
4271 RegNum = RegsForType.find_first();
4272 Preg = getPhysicalRegister(RegNum, Dest->getType());
4273 SpillLoc = Func->makeVariable(Dest->getType());
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004274 if (IsVector)
4275 _movp(SpillLoc, Preg);
4276 else
4277 _mov(SpillLoc, Preg);
4278 }
4279 assert(RegNum >= 0);
4280 if (llvm::isa<ConstantUndef>(Src))
4281 // Materialize an actual constant instead of undef. RegNum is
4282 // passed in for vector types because undef vectors are
4283 // lowered to vector register of zeroes.
4284 Src =
4285 legalize(Src, Legal_All, IsVector ? RegNum : Variable::NoRegister);
4286 Variable *Tmp = makeReg(Dest->getType(), RegNum);
4287 if (IsVector) {
4288 _movp(Tmp, Src);
4289 _movp(Dest, Tmp);
4290 } else {
4291 _mov(Tmp, Src);
4292 _mov(Dest, Tmp);
4293 }
4294 if (NeedSpill) {
4295 // Restore the spilled register.
4296 if (IsVector)
4297 _movp(Preg, SpillLoc);
4298 else
4299 _mov(Preg, SpillLoc);
4300 }
4301 }
4302 // Update register availability before moving to the previous
4303 // instruction on the Assignments list.
4304 if (Dest->hasReg())
4305 Available[Dest->getRegNum()] = true;
4306 if (SrcVar && SrcVar->hasReg())
4307 Available[SrcVar->getRegNum()] = false;
4308 }
4309
4310 // Add the terminator branch instruction to the end.
Jim Stichnoth5d2fa0c2014-12-01 09:30:55 -08004311 Context.setInsertPoint(Context.getEnd());
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004312 _br(Succ);
4313}
4314
Matt Wala9a0168a2014-07-23 14:56:10 -07004315// There is no support for loading or emitting vector constants, so the
4316// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4317// etc. are initialized with register operations.
4318//
4319// TODO(wala): Add limited support for vector constants so that
4320// complex initialization in registers is unnecessary.
4321
Matt Wala83b80362014-07-16 10:21:30 -07004322Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004323 Variable *Reg = makeReg(Ty, RegNum);
4324 // Insert a FakeDef, since otherwise the live range of Reg might
4325 // be overestimated.
4326 Context.insert(InstFakeDef::create(Func, Reg));
4327 _pxor(Reg, Reg);
4328 return Reg;
4329}
4330
Matt Wala9a0168a2014-07-23 14:56:10 -07004331Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
4332 Variable *MinusOnes = makeReg(Ty, RegNum);
4333 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
4334 Context.insert(InstFakeDef::create(Func, MinusOnes));
4335 _pcmpeq(MinusOnes, MinusOnes);
4336 return MinusOnes;
4337}
4338
Matt Wala83b80362014-07-16 10:21:30 -07004339Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004340 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
Matt Wala9a0168a2014-07-23 14:56:10 -07004341 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
Matt Wala83b80362014-07-16 10:21:30 -07004342 _psub(Dest, MinusOne);
4343 return Dest;
4344}
4345
Matt Wala9a0168a2014-07-23 14:56:10 -07004346Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
4347 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4348 Ty == IceType_v16i8);
4349 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4350 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4351 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004352 _psll(Reg, Ctx->getConstantInt8(Shift));
Matt Wala9a0168a2014-07-23 14:56:10 -07004353 return Reg;
4354 } else {
4355 // SSE has no left shift operation for vectors of 8 bit integers.
4356 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004357 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
Matt Wala9a0168a2014-07-23 14:56:10 -07004358 Variable *Reg = makeReg(Ty, RegNum);
4359 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4360 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4361 return Reg;
4362 }
4363}
4364
Matt Wala49889232014-07-18 12:45:09 -07004365OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
4366 Variable *Slot,
4367 uint32_t Offset) {
4368 // Ensure that Loc is a stack slot.
4369 assert(Slot->getWeight() == RegWeight::Zero);
4370 assert(Slot->getRegNum() == Variable::NoRegister);
4371 // Compute the location of Loc in memory.
4372 // TODO(wala,stichnot): lea should not be required. The address of
4373 // the stack slot is known at compile time (although not until after
4374 // addProlog()).
4375 const Type PointerType = IceType_i32;
4376 Variable *Loc = makeReg(PointerType);
4377 _lea(Loc, Slot);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004378 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
Matt Wala49889232014-07-18 12:45:09 -07004379 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4380}
4381
Matt Wala928f1292014-07-07 16:50:46 -07004382// Helper for legalize() to emit the right code to lower an operand to a
4383// register of the appropriate type.
4384Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
4385 Type Ty = Src->getType();
4386 Variable *Reg = makeReg(Ty, RegNum);
Matt Walaad8f7262014-07-14 17:37:37 -07004387 if (isVectorType(Ty)) {
Matt Wala928f1292014-07-07 16:50:46 -07004388 _movp(Reg, Src);
4389 } else {
4390 _mov(Reg, Src);
4391 }
4392 return Reg;
4393}
4394
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004395Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
Jim Stichnothad403532014-09-25 12:44:17 -07004396 int32_t RegNum) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004397 // Assert that a physical register is allowed. To date, all calls
4398 // to legalize() allow a physical register. If a physical register
4399 // needs to be explicitly disallowed, then new code will need to be
4400 // written to force a spill.
4401 assert(Allowed & Legal_Reg);
4402 // If we're asking for a specific physical register, make sure we're
4403 // not allowing any other operand kinds. (This could be future
4404 // work, e.g. allow the shl shift amount to be either an immediate
4405 // or in ecx.)
4406 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
4407 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
4408 // Before doing anything with a Mem operand, we need to ensure
4409 // that the Base and Index components are in physical registers.
4410 Variable *Base = Mem->getBase();
4411 Variable *Index = Mem->getIndex();
Jim Stichnothae953202014-12-20 06:17:49 -08004412 Variable *RegBase = nullptr;
4413 Variable *RegIndex = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004414 if (Base) {
Jim Stichnothad403532014-09-25 12:44:17 -07004415 RegBase = legalizeToVar(Base);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004416 }
4417 if (Index) {
Jim Stichnothad403532014-09-25 12:44:17 -07004418 RegIndex = legalizeToVar(Index);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004419 }
4420 if (Base != RegBase || Index != RegIndex) {
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004421 From = OperandX8632Mem::create(
4422 Func, Mem->getType(), RegBase, Mem->getOffset(), RegIndex,
4423 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004424 }
4425
4426 if (!(Allowed & Legal_Mem)) {
Matt Wala928f1292014-07-07 16:50:46 -07004427 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004428 }
4429 return From;
4430 }
4431 if (llvm::isa<Constant>(From)) {
Matt Walad8f4a7d2014-06-18 09:55:03 -07004432 if (llvm::isa<ConstantUndef>(From)) {
4433 // Lower undefs to zero. Another option is to lower undefs to an
4434 // uninitialized register; however, using an uninitialized register
4435 // results in less predictable code.
4436 //
4437 // If in the future the implementation is changed to lower undef
4438 // values to uninitialized registers, a FakeDef will be needed:
4439 // Context.insert(InstFakeDef::create(Func, Reg));
4440 // This is in order to ensure that the live range of Reg is not
4441 // overestimated. If the constant being lowered is a 64 bit value,
4442 // then the result should be split and the lo and hi components will
4443 // need to go in uninitialized registers.
Matt Wala83b80362014-07-16 10:21:30 -07004444 if (isVectorType(From->getType()))
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004445 return makeVectorOfZeros(From->getType(), RegNum);
Matt Wala83b80362014-07-16 10:21:30 -07004446 From = Ctx->getConstantZero(From->getType());
Matt Walad8f4a7d2014-06-18 09:55:03 -07004447 }
Matt Walaad8f7262014-07-14 17:37:37 -07004448 // There should be no constants of vector type (other than undef).
4449 assert(!isVectorType(From->getType()));
Jim Stichnothde4ca712014-06-29 08:13:48 -07004450 bool NeedsReg = false;
4451 if (!(Allowed & Legal_Imm))
4452 // Immediate specifically not allowed
4453 NeedsReg = true;
Jan Voung3a569182014-09-29 10:16:01 -07004454 if (!(Allowed & Legal_Mem) && isScalarFloatingType(From->getType()))
Jim Stichnothde4ca712014-06-29 08:13:48 -07004455 // On x86, FP constants are lowered to mem operands.
4456 NeedsReg = true;
Jan Voung5a13f452014-06-18 10:42:02 -07004457 if (NeedsReg) {
Matt Wala928f1292014-07-07 16:50:46 -07004458 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004459 }
4460 return From;
4461 }
4462 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
Matt Walaad8f7262014-07-14 17:37:37 -07004463 // Check if the variable is guaranteed a physical register. This
4464 // can happen either when the variable is pre-colored or when it is
4465 // assigned infinite weight.
4466 bool MustHaveRegister =
4467 (Var->hasReg() || Var->getWeight() == RegWeight::Inf);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004468 // We need a new physical register for the operand if:
Jim Stichnothca662e92014-07-10 15:32:36 -07004469 // Mem is not allowed and Var isn't guaranteed a physical
4470 // register, or
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004471 // RegNum is required and Var->getRegNum() doesn't match.
Matt Walaad8f7262014-07-14 17:37:37 -07004472 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004473 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
Jim Stichnothad403532014-09-25 12:44:17 -07004474 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004475 }
4476 return From;
4477 }
4478 llvm_unreachable("Unhandled operand kind in legalize()");
4479 return From;
4480}
4481
4482// Provide a trivial wrapper to legalize() for this common usage.
Jim Stichnothad403532014-09-25 12:44:17 -07004483Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
4484 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004485}
4486
Jan Voung5cd240d2014-06-25 10:36:46 -07004487OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
4488 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
4489 // It may be the case that address mode optimization already creates
4490 // an OperandX8632Mem, so in that case it wouldn't need another level
4491 // of transformation.
4492 if (!Mem) {
4493 Variable *Base = llvm::dyn_cast<Variable>(Operand);
4494 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
4495 assert(Base || Offset);
Matt Walae3777672014-07-31 09:06:17 -07004496 if (Offset) {
Jan Voungbc004632014-09-16 15:09:10 -07004497 assert(llvm::isa<ConstantInteger32>(Offset) ||
Matt Walae3777672014-07-31 09:06:17 -07004498 llvm::isa<ConstantRelocatable>(Offset));
4499 }
Jan Voung5cd240d2014-06-25 10:36:46 -07004500 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
4501 }
4502 return llvm::cast<OperandX8632Mem>(legalize(Mem));
4503}
4504
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004505Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
Jan Voung1ee34162014-06-24 13:43:30 -07004506 // There aren't any 64-bit integer registers for x86-32.
4507 assert(Type != IceType_i64);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004508 Variable *Reg = Func->makeVariable(Type);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004509 if (RegNum == Variable::NoRegister)
4510 Reg->setWeightInfinite();
4511 else
4512 Reg->setRegNum(RegNum);
4513 return Reg;
4514}
4515
4516void TargetX8632::postLower() {
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004517 if (Ctx->getOptLevel() == Opt_m1)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004518 return;
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004519 // Find two-address non-SSA instructions where Dest==Src0, and set
4520 // the DestNonKillable flag to keep liveness analysis consistent.
Jim Stichnoth5d2fa0c2014-12-01 09:30:55 -08004521 for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004522 if (Inst->isDeleted())
4523 continue;
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004524 if (Variable *Dest = Inst->getDest()) {
4525 // TODO(stichnot): We may need to consider all source
4526 // operands, not just the first one, if using 3-address
4527 // instructions.
4528 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
4529 Inst->setDestNonKillable();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004530 }
4531 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004532}
4533
Jim Stichnothe6d24782014-12-19 05:42:24 -08004534void TargetX8632::makeRandomRegisterPermutation(
4535 llvm::SmallVectorImpl<int32_t> &Permutation,
4536 const llvm::SmallBitVector &ExcludeRegisters) const {
4537 // TODO(stichnot): Declaring Permutation this way loses type/size
4538 // information. Fix this in conjunction with the caller-side TODO.
4539 assert(Permutation.size() >= RegX8632::Reg_NUM);
4540 // Expected upper bound on the number of registers in a single
4541 // equivalence class. For x86-32, this would comprise the 8 XMM
4542 // registers. This is for performance, not correctness.
4543 static const unsigned MaxEquivalenceClassSize = 8;
4544 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
4545 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
4546 EquivalenceClassMap EquivalenceClasses;
4547 SizeT NumShuffled = 0, NumPreserved = 0;
4548
4549// Build up the equivalence classes of registers by looking at the
4550// register properties as well as whether the registers should be
4551// explicitly excluded from shuffling.
4552#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
4553 frameptr, isI8, isInt, isFP) \
4554 if (ExcludeRegisters[RegX8632::val]) { \
4555 /* val stays the same in the resulting permutation. */ \
4556 Permutation[RegX8632::val] = RegX8632::val; \
4557 ++NumPreserved; \
4558 } else { \
4559 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
4560 (isInt << 3) | (isFP << 4); \
4561 /* val is assigned to an equivalence class based on its properties. */ \
4562 EquivalenceClasses[Index].push_back(RegX8632::val); \
4563 }
4564 REGX8632_TABLE
4565#undef X
4566
4567 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4568
4569 // Shuffle the resulting equivalence classes.
4570 for (auto I : EquivalenceClasses) {
4571 const RegisterList &List = I.second;
4572 RegisterList Shuffled(List);
4573 std::random_shuffle(Shuffled.begin(), Shuffled.end(), RNG);
4574 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
4575 Permutation[List[SI]] = Shuffled[SI];
4576 ++NumShuffled;
4577 }
4578 }
4579
4580 assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
4581
4582 if (Func->getContext()->isVerbose(IceV_Random)) {
4583 Ostream &Str = Func->getContext()->getStrDump();
4584 Str << "Register equivalence classes:\n";
4585 for (auto I : EquivalenceClasses) {
4586 Str << "{";
4587 const RegisterList &List = I.second;
4588 bool First = true;
4589 for (int32_t Register : List) {
4590 if (!First)
4591 Str << " ";
4592 First = false;
4593 Str << getRegName(Register, IceType_i32);
4594 }
4595 Str << "}\n";
4596 }
4597 }
4598}
4599
Jan Voungbc004632014-09-16 15:09:10 -07004600template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08004601 if (!ALLOW_DUMP)
4602 return;
Matt Wala928f1292014-07-07 16:50:46 -07004603 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothbca2f652014-11-01 10:13:54 -07004604 Str << "$" << (int32_t)getValue();
Jan Voungbc004632014-09-16 15:09:10 -07004605}
4606
4607template <> void ConstantInteger64::emit(GlobalContext *) const {
4608 llvm_unreachable("Not expecting to emit 64-bit integers");
Matt Wala928f1292014-07-07 16:50:46 -07004609}
4610
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004611template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08004612 if (!ALLOW_DUMP)
4613 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004614 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothbca2f652014-11-01 10:13:54 -07004615 Str << ".L$" << IceType_f32 << "$" << getPoolEntryID();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004616}
4617
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004618template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08004619 if (!ALLOW_DUMP)
4620 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004621 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothbca2f652014-11-01 10:13:54 -07004622 Str << ".L$" << IceType_f64 << "$" << getPoolEntryID();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004623}
4624
Matt Walae3777672014-07-31 09:06:17 -07004625void ConstantUndef::emit(GlobalContext *) const {
4626 llvm_unreachable("undef value encountered by emitter.");
4627}
4628
Jim Stichnothde4ca712014-06-29 08:13:48 -07004629TargetGlobalInitX8632::TargetGlobalInitX8632(GlobalContext *Ctx)
4630 : TargetGlobalInitLowering(Ctx) {}
4631
Karl Schimpf9d98d792014-10-13 15:01:08 -07004632void TargetGlobalInitX8632::lower(const VariableDeclaration &Var) {
Jan Voung08c3bcd2014-12-01 17:55:16 -08004633 // TODO(jvoung): handle this without text.
4634 if (Ctx->getFlags().UseELFWriter)
4635 return;
Jim Stichnothde4ca712014-06-29 08:13:48 -07004636
4637 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothde4ca712014-06-29 08:13:48 -07004638
Karl Schimpf9d98d792014-10-13 15:01:08 -07004639 const VariableDeclaration::InitializerListType &Initializers =
4640 Var.getInitializers();
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004641
4642 // If external and not initialized, this must be a cross test.
4643 // Don't generate a declaration for such cases.
Jim Stichnoth088b2be2014-10-23 12:02:08 -07004644 bool IsExternal = Var.isExternal() || Ctx->getFlags().DisableInternal;
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004645 if (IsExternal && !Var.hasInitializer()) return;
4646
4647 bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004648 bool IsConstant = Var.getIsConstant();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004649 uint32_t Align = Var.getAlignment();
4650 SizeT Size = Var.getNumBytes();
4651 IceString MangledName = Var.mangleName(Ctx);
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004652 IceString SectionSuffix = "";
4653 if (Ctx->getFlags().DataSections)
4654 SectionSuffix = "." + MangledName;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004655
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004656 Str << "\t.type\t" << MangledName << ",@object\n";
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004657
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004658 if (IsConstant)
4659 Str << "\t.section\t.rodata" << SectionSuffix << ",\"a\",@progbits\n";
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004660 else if (HasNonzeroInitializer)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004661 Str << "\t.section\t.data" << SectionSuffix << ",\"aw\",@progbits\n";
4662 else if (IsExternal)
4663 Str << "\t.section\t.bss" << SectionSuffix << ",\"aw\",@nobits\n";
4664 // No .section for non-constant + zeroinitializer + internal
4665
4666 if (IsExternal)
4667 Str << "\t.globl\t" << MangledName << "\n";
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004668 else if (!IsConstant && !HasNonzeroInitializer)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004669 Str << "\t.local\t" << MangledName << "\n";
4670 // Internal symbols only get .local when using .comm.
4671
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004672 if ((IsConstant || HasNonzeroInitializer || IsExternal) && Align > 1)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004673 Str << "\t.align\t" << Align << "\n";
4674 // Alignment is part of .comm.
4675
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004676 if (IsConstant || HasNonzeroInitializer || IsExternal)
Jim Stichnothde4ca712014-06-29 08:13:48 -07004677 Str << MangledName << ":\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004678 else
4679 Str << "\t.comm\t" << MangledName << "," << Size << "," << Align << "\n";
4680
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004681 if (HasNonzeroInitializer) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004682 for (VariableDeclaration::Initializer *Init : Initializers) {
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004683 switch (Init->getKind()) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004684 case VariableDeclaration::Initializer::DataInitializerKind: {
4685 const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(Init)
4686 ->getContents();
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004687 for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
4688 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4689 }
4690 break;
4691 }
Karl Schimpf9d98d792014-10-13 15:01:08 -07004692 case VariableDeclaration::Initializer::ZeroInitializerKind:
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004693 Str << "\t.zero\t" << Init->getNumBytes() << "\n";
4694 break;
Karl Schimpf9d98d792014-10-13 15:01:08 -07004695 case VariableDeclaration::Initializer::RelocInitializerKind: {
4696 const auto Reloc =
4697 llvm::cast<VariableDeclaration::RelocInitializer>(Init);
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004698 Str << "\t.long\t";
Karl Schimpf9d98d792014-10-13 15:01:08 -07004699 Str << Reloc->getDeclaration()->mangleName(Ctx);
Jan Voungc0d965f2014-11-04 16:55:01 -08004700 if (RelocOffsetT Offset = Reloc->getOffset()) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004701 if (Offset >= 0 || (Offset == INT32_MIN))
4702 Str << " + " << Offset;
4703 else
4704 Str << " - " << -Offset;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004705 }
4706 Str << "\n";
4707 break;
4708 }
4709 default: {
4710 std::string Buffer;
4711 llvm::raw_string_ostream StrBuf(Buffer);
4712 StrBuf << "Unable to lower initializer: ";
4713 Init->dump(StrBuf);
4714 llvm::report_fatal_error(StrBuf.str());
4715 break;
4716 }
4717 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07004718 }
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004719 } else if (IsConstant || IsExternal)
4720 Str << "\t.zero\t" << Size << "\n";
4721 // Size is part of .comm.
4722
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004723 if (IsConstant || HasNonzeroInitializer || IsExternal)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004724 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4725 // Size is part of .comm.
Jim Stichnothde4ca712014-06-29 08:13:48 -07004726}
4727
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004728} // end of namespace Ice