blob: cd40e096443558d3e03c90067d7cab57e495688a [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
Jim Stichnoth70d0a052014-11-14 15:53:46 -080012// high-level instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070013//
14//===----------------------------------------------------------------------===//
15
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070016#include "llvm/ADT/DenseMap.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/MathExtras.h"
19
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070020#include "IceCfg.h"
21#include "IceCfgNode.h"
Jim Stichnothbfb03e52014-08-26 10:29:05 -070022#include "IceClFlags.h"
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070023#include "IceDefs.h"
Karl Schimpfe3f64d02014-10-07 10:38:22 -070024#include "IceGlobalInits.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070025#include "IceInstX8632.h"
Jim Stichnoth336f6c42014-10-30 15:01:31 -070026#include "IceLiveness.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070027#include "IceOperand.h"
Jan Voungbd385e42014-09-18 18:18:10 -070028#include "IceRegistersX8632.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070029#include "IceTargetLoweringX8632.def"
30#include "IceTargetLoweringX8632.h"
Jan Voung8acded02014-09-22 18:02:25 -070031#include "IceUtils.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070032
33namespace Ice {
34
35namespace {
36
Matt Walace0ca8f2014-07-24 12:34:20 -070037// The following table summarizes the logic for lowering the fcmp
38// instruction. There is one table entry for each of the 16 conditions.
39//
40// The first four columns describe the case when the operands are
41// floating point scalar values. A comment in lowerFcmp() describes the
42// lowering template. In the most general case, there is a compare
43// followed by two conditional branches, because some fcmp conditions
44// don't map to a single x86 conditional branch. However, in many cases
45// it is possible to swap the operands in the comparison and have a
46// single conditional branch. Since it's quite tedious to validate the
47// table by hand, good execution tests are helpful.
48//
49// The last two columns describe the case when the operands are vectors
50// of floating point values. For most fcmp conditions, there is a clear
51// mapping to a single x86 cmpps instruction variant. Some fcmp
52// conditions require special code to handle and these are marked in the
53// table with a Cmpps_Invalid predicate.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070054const struct TableFcmp_ {
55 uint32_t Default;
Matt Walace0ca8f2014-07-24 12:34:20 -070056 bool SwapScalarOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070057 CondX86::BrCond C1, C2;
Matt Walace0ca8f2014-07-24 12:34:20 -070058 bool SwapVectorOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070059 CondX86::CmppsCond Predicate;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070060} TableFcmp[] = {
Matt Walace0ca8f2014-07-24 12:34:20 -070061#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jan Voungbd385e42014-09-18 18:18:10 -070062 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070063 ,
Jim Stichnothfac55172014-10-01 13:06:21 -070064 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070065#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -070066 };
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070067const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
68
69// The following table summarizes the logic for lowering the icmp instruction
70// for i32 and narrower types. Each icmp condition has a clear mapping to an
71// x86 conditional branch instruction.
72
73const struct TableIcmp32_ {
Jan Voungbd385e42014-09-18 18:18:10 -070074 CondX86::BrCond Mapping;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070075} TableIcmp32[] = {
76#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070077 { CondX86::C_32 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070078 ,
79 ICMPX8632_TABLE
80#undef X
81 };
82const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
83
84// The following table summarizes the logic for lowering the icmp instruction
85// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
86// conditional branches are needed. For the other conditions, three separate
87// conditional branches are needed.
88const struct TableIcmp64_ {
Jan Voungbd385e42014-09-18 18:18:10 -070089 CondX86::BrCond C1, C2, C3;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070090} TableIcmp64[] = {
91#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070092 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070093 ,
94 ICMPX8632_TABLE
95#undef X
96 };
97const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
98
Jan Voungbd385e42014-09-18 18:18:10 -070099CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700100 size_t Index = static_cast<size_t>(Cond);
101 assert(Index < TableIcmp32Size);
102 return TableIcmp32[Index].Mapping;
103}
104
Matt Wala49889232014-07-18 12:45:09 -0700105const struct TableTypeX8632Attributes_ {
106 Type InVectorElementType;
107} TableTypeX8632Attributes[] = {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700108#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Matt Wala49889232014-07-18 12:45:09 -0700109 { elementty } \
110 ,
111 ICETYPEX8632_TABLE
112#undef X
113 };
114const size_t TableTypeX8632AttributesSize =
115 llvm::array_lengthof(TableTypeX8632Attributes);
116
117// Return the type which the elements of the vector have in the X86
118// representation of the vector.
119Type getInVectorElementType(Type Ty) {
120 assert(isVectorType(Ty));
121 size_t Index = static_cast<size_t>(Ty);
Jim Stichnoth6e992142014-07-30 14:45:20 -0700122 (void)Index;
Matt Wala49889232014-07-18 12:45:09 -0700123 assert(Index < TableTypeX8632AttributesSize);
124 return TableTypeX8632Attributes[Ty].InVectorElementType;
125}
126
Matt Wala45a06232014-07-09 16:33:22 -0700127// The maximum number of arguments to pass in XMM registers
Matt Wala105b7042014-08-11 19:56:19 -0700128const uint32_t X86_MAX_XMM_ARGS = 4;
Matt Wala83b80362014-07-16 10:21:30 -0700129// The number of bits in a byte
Matt Wala105b7042014-08-11 19:56:19 -0700130const uint32_t X86_CHAR_BIT = 8;
131// Stack alignment
132const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
133// Size of the return address on the stack
134const uint32_t X86_RET_IP_SIZE_BYTES = 4;
Matt Walad4799f42014-08-14 14:24:12 -0700135// The base 2 logarithm of the width in bytes of the smallest stack slot
136const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
137// The base 2 logarithm of the width in bytes of the largest stack slot
138const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
Matt Walac3302742014-08-15 16:21:56 -0700139// The number of different NOP instructions
140const uint32_t X86_NUM_NOP_VARIANTS = 5;
Matt Wala105b7042014-08-11 19:56:19 -0700141
Matt Walad4799f42014-08-14 14:24:12 -0700142// Value and Alignment are in bytes. Return Value adjusted to the next
143// highest multiple of Alignment.
144uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
Matt Wala105b7042014-08-11 19:56:19 -0700145 // power of 2
Matt Walad4799f42014-08-14 14:24:12 -0700146 assert((Alignment & (Alignment - 1)) == 0);
147 return (Value + Alignment - 1) & -Alignment;
148}
149
150// Value is in bytes. Return Value adjusted to the next highest multiple
151// of the stack alignment.
152uint32_t applyStackAlignment(uint32_t Value) {
153 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
Matt Wala105b7042014-08-11 19:56:19 -0700154}
Matt Wala45a06232014-07-09 16:33:22 -0700155
Matt Wala0a450512014-07-30 12:44:39 -0700156// Instruction set options
157namespace cl = ::llvm::cl;
Jim Stichnothfac55172014-10-01 13:06:21 -0700158cl::opt<TargetX8632::X86InstructionSet>
159CLInstructionSet("mattr", cl::desc("X86 target attributes"),
160 cl::init(TargetX8632::SSE2),
161 cl::values(clEnumValN(TargetX8632::SSE2, "sse2",
162 "Enable SSE2 instructions (default)"),
163 clEnumValN(TargetX8632::SSE4_1, "sse4.1",
164 "Enable SSE 4.1 instructions"),
165 clEnumValEnd));
Matt Wala0a450512014-07-30 12:44:39 -0700166
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700167// In some cases, there are x-macros tables for both high-level and
168// low-level instructions/operands that use the same enum key value.
169// The tables are kept separate to maintain a proper separation
Jim Stichnothfac55172014-10-01 13:06:21 -0700170// between abstraction layers. There is a risk that the tables could
171// get out of sync if enum values are reordered or if entries are
172// added or deleted. The following dummy namespaces use
173// static_asserts to ensure everything is kept in sync.
174
175// Validate the enum values in FCMPX8632_TABLE.
176namespace dummy1 {
177// Define a temporary set of enum values based on low-level table
178// entries.
179enum _tmp_enum {
Matt Walace0ca8f2014-07-24 12:34:20 -0700180#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700181 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700182#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700183 _num
184};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700185// Define a set of constants based on high-level table entries.
186#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700187ICEINSTFCMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700188#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700189// Define a set of constants based on low-level table entries, and
190// ensure the table entry keys are consistent.
Matt Walace0ca8f2014-07-24 12:34:20 -0700191#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700192 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700193 static_assert( \
194 _table1_##val == _table2_##val, \
195 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
196FCMPX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700197#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700198// Repeat the static asserts with respect to the high-level table
199// entries in case the high-level table has extra entries.
200#define X(tag, str) \
201 static_assert( \
202 _table1_##tag == _table2_##tag, \
203 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
204ICEINSTFCMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700205#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700206} // end of namespace dummy1
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700207
Jim Stichnothfac55172014-10-01 13:06:21 -0700208// Validate the enum values in ICMPX8632_TABLE.
209namespace dummy2 {
210// Define a temporary set of enum values based on low-level table
211// entries.
212enum _tmp_enum {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700213#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700214 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700215#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700216 _num
217};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700218// Define a set of constants based on high-level table entries.
219#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700220ICEINSTICMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700221#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700222// Define a set of constants based on low-level table entries, and
223// ensure the table entry keys are consistent.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700224#define X(val, C_32, C1_64, C2_64, C3_64) \
225 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700226 static_assert( \
227 _table1_##val == _table2_##val, \
228 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
229ICMPX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700230#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700231// Repeat the static asserts with respect to the high-level table
232// entries in case the high-level table has extra entries.
233#define X(tag, str) \
234 static_assert( \
235 _table1_##tag == _table2_##tag, \
236 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
237ICEINSTICMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700238#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700239} // end of namespace dummy2
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700240
Jim Stichnothfac55172014-10-01 13:06:21 -0700241// Validate the enum values in ICETYPEX8632_TABLE.
242namespace dummy3 {
243// Define a temporary set of enum values based on low-level table
244// entries.
245enum _tmp_enum {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700246#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
Jim Stichnothfac55172014-10-01 13:06:21 -0700247 ICETYPEX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700248#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700249 _num
250};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700251// Define a set of constants based on high-level table entries.
Matt Wala928f1292014-07-07 16:50:46 -0700252#define X(tag, size, align, elts, elty, str) \
253 static const int _table1_##tag = tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700254ICETYPE_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700255#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700256// Define a set of constants based on low-level table entries, and
257// ensure the table entry keys are consistent.
Jim Stichnothbca2f652014-11-01 10:13:54 -0700258#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700259 static const int _table2_##tag = _tmp_##tag; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700260 static_assert(_table1_##tag == _table2_##tag, \
261 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
262ICETYPEX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700263#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700264// Repeat the static asserts with respect to the high-level table
265// entries in case the high-level table has extra entries.
Matt Wala928f1292014-07-07 16:50:46 -0700266#define X(tag, size, align, elts, elty, str) \
Jim Stichnothfac55172014-10-01 13:06:21 -0700267 static_assert(_table1_##tag == _table2_##tag, \
268 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
269ICETYPE_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700270#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700271} // end of namespace dummy3
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700272
273} // end of anonymous namespace
274
275TargetX8632::TargetX8632(Cfg *Func)
Matt Wala0a450512014-07-30 12:44:39 -0700276 : TargetLowering(Func), InstructionSet(CLInstructionSet),
Matt Wala105b7042014-08-11 19:56:19 -0700277 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
Jim Stichnoth33c80642014-11-11 09:49:04 -0800278 SpillAreaSizeBytes(0), NextLabelNumber(0) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700279 // TODO: Don't initialize IntegerRegisters and friends every time.
280 // Instead, initialize in some sort of static initializer for the
281 // class.
Jan Voungbd385e42014-09-18 18:18:10 -0700282 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
283 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
284 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
285 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
286 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
287 ScratchRegs.resize(RegX8632::Reg_NUM);
288#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700289 frameptr, isI8, isInt, isFP) \
Jan Voungbd385e42014-09-18 18:18:10 -0700290 IntegerRegisters[RegX8632::val] = isInt; \
291 IntegerRegistersI8[RegX8632::val] = isI8; \
292 FloatRegisters[RegX8632::val] = isFP; \
293 VectorRegisters[RegX8632::val] = isFP; \
294 ScratchRegs[RegX8632::val] = scratch;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700295 REGX8632_TABLE;
296#undef X
297 TypeToRegisterSet[IceType_void] = InvalidRegisters;
298 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
299 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
300 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
301 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
302 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
303 TypeToRegisterSet[IceType_f32] = FloatRegisters;
304 TypeToRegisterSet[IceType_f64] = FloatRegisters;
Matt Wala928f1292014-07-07 16:50:46 -0700305 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
306 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
307 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
308 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
309 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
310 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
311 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700312}
313
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700314void TargetX8632::translateO2() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700315 TimerMarker T(TimerStack::TT_O2, Func);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700316
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700317 if (!Ctx->getFlags().PhiEdgeSplit) {
318 // Lower Phi instructions.
319 Func->placePhiLoads();
320 if (Func->hasError())
321 return;
322 Func->placePhiStores();
323 if (Func->hasError())
324 return;
325 Func->deletePhis();
326 if (Func->hasError())
327 return;
328 Func->dump("After Phi lowering");
329 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700330
331 // Address mode optimization.
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700332 Func->getVMetadata()->init(VMK_SingleDefs);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700333 Func->doAddressOpt();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700334
Matt Wala45a06232014-07-09 16:33:22 -0700335 // Argument lowering
Matt Wala45a06232014-07-09 16:33:22 -0700336 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700337
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700338 // Target lowering. This requires liveness analysis for some parts
339 // of the lowering decisions, such as compare/branch fusing. If
340 // non-lightweight liveness analysis is used, the instructions need
341 // to be renumbered first. TODO: This renumbering should only be
342 // necessary if we're actually calculating live intervals, which we
343 // only do for register allocation.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700344 Func->renumberInstructions();
345 if (Func->hasError())
346 return;
Matt Wala45a06232014-07-09 16:33:22 -0700347
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700348 // TODO: It should be sufficient to use the fastest liveness
349 // calculation, i.e. livenessLightweight(). However, for some
350 // reason that slows down the rest of the translation. Investigate.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700351 Func->liveness(Liveness_Basic);
352 if (Func->hasError())
353 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700354 Func->dump("After x86 address mode opt");
Matt Wala45a06232014-07-09 16:33:22 -0700355
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700356 Func->genCode();
357 if (Func->hasError())
358 return;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700359 Func->dump("After x86 codegen");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700360
361 // Register allocation. This requires instruction renumbering and
362 // full liveness analysis.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700363 Func->renumberInstructions();
364 if (Func->hasError())
365 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700366 Func->liveness(Liveness_Intervals);
367 if (Func->hasError())
368 return;
Jim Stichnoth9c234e22014-10-01 09:28:21 -0700369 // Validate the live range computations. The expensive validation
370 // call is deliberately only made when assertions are enabled.
371 assert(Func->validateLiveness());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700372 // The post-codegen dump is done here, after liveness analysis and
373 // associated cleanup, to make the dump cleaner and more useful.
374 Func->dump("After initial x8632 codegen");
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700375 Func->getVMetadata()->init(VMK_All);
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800376 regAlloc(RAK_Global);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700377 if (Func->hasError())
378 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700379 Func->dump("After linear scan regalloc");
380
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700381 if (Ctx->getFlags().PhiEdgeSplit) {
382 Func->advancedPhiLowering();
383 Func->dump("After advanced Phi lowering");
384 }
385
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700386 // Stack frame mapping.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700387 Func->genFrame();
388 if (Func->hasError())
389 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700390 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700391
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700392 Func->contractEmptyNodes();
393 Func->reorderNodes();
Jim Stichnoth98712a32014-10-24 10:59:02 -0700394
Jim Stichnothff9c7062014-09-18 04:50:49 -0700395 // Branch optimization. This needs to be done just before code
396 // emission. In particular, no transformations that insert or
397 // reorder CfgNodes should be done after branch optimization. We go
398 // ahead and do it before nop insertion to reduce the amount of work
399 // needed for searching for opportunities.
400 Func->doBranchOpt();
401 Func->dump("After branch optimization");
402
Matt Walac3302742014-08-15 16:21:56 -0700403 // Nop insertion
404 if (shouldDoNopInsertion()) {
405 Func->doNopInsertion();
406 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700407}
408
409void TargetX8632::translateOm1() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700410 TimerMarker T(TimerStack::TT_Om1, Func);
Jim Stichnothbb8b6242014-11-04 09:10:01 -0800411
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700412 Func->placePhiLoads();
413 if (Func->hasError())
414 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700415 Func->placePhiStores();
416 if (Func->hasError())
417 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700418 Func->deletePhis();
419 if (Func->hasError())
420 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700421 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700422
Matt Wala45a06232014-07-09 16:33:22 -0700423 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700424
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700425 Func->genCode();
426 if (Func->hasError())
427 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700428 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700429
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800430 regAlloc(RAK_InfOnly);
431 if (Func->hasError())
432 return;
433 Func->dump("After regalloc of infinite-weight variables");
434
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700435 Func->genFrame();
436 if (Func->hasError())
437 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700438 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700439
440 // Nop insertion
441 if (shouldDoNopInsertion()) {
442 Func->doNopInsertion();
443 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700444}
445
Jim Stichnothff9c7062014-09-18 04:50:49 -0700446bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
447 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
448 return Br->optimizeBranch(NextNode);
449 }
450 return false;
451}
452
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700453IceString TargetX8632::RegNames[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700454#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700455 frameptr, isI8, isInt, isFP) \
456 name,
457 REGX8632_TABLE
458#undef X
459};
460
Jim Stichnoth98712a32014-10-24 10:59:02 -0700461Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {
462 if (Ty == IceType_void)
463 Ty = IceType_i32;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700464 if (PhysicalRegisters[Ty].empty())
465 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
466 assert(RegNum < PhysicalRegisters[Ty].size());
467 Variable *Reg = PhysicalRegisters[Ty][RegNum];
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700468 if (Reg == NULL) {
Jim Stichnoth98712a32014-10-24 10:59:02 -0700469 Reg = Func->makeVariable(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700470 Reg->setRegNum(RegNum);
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700471 PhysicalRegisters[Ty][RegNum] = Reg;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700472 // Specially mark esp as an "argument" so that it is considered
473 // live upon function entry.
Jim Stichnoth47752552014-10-13 17:15:08 -0700474 if (RegNum == RegX8632::Reg_esp) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700475 Func->addImplicitArg(Reg);
Jim Stichnoth47752552014-10-13 17:15:08 -0700476 Reg->setIgnoreLiveness();
477 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700478 }
479 return Reg;
480}
481
482IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
Jan Voungbd385e42014-09-18 18:18:10 -0700483 assert(RegNum < RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700484 static IceString RegNames8[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700485#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700486 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700487 name8,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700488 REGX8632_TABLE
489#undef X
490 };
491 static IceString RegNames16[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700492#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700493 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700494 name16,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700495 REGX8632_TABLE
496#undef X
497 };
498 switch (Ty) {
499 case IceType_i1:
500 case IceType_i8:
501 return RegNames8[RegNum];
502 case IceType_i16:
503 return RegNames16[RegNum];
504 default:
505 return RegNames[RegNum];
506 }
507}
508
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700509void TargetX8632::emitVariable(const Variable *Var) const {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700510 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700511 if (Var->hasReg()) {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700512 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700513 return;
514 }
Jim Stichnothdd165072014-11-02 09:41:45 -0800515 if (Var->getWeight().isInf())
516 llvm_unreachable("Infinite-weight Variable has no register assigned");
Jim Stichnothbca2f652014-11-01 10:13:54 -0700517 const Type Ty = IceType_i32;
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700518 int32_t Offset = Var->getStackOffset();
519 if (!hasFramePointer())
520 Offset += getStackAdjustment();
Jim Stichnothbca2f652014-11-01 10:13:54 -0700521 if (Offset)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700522 Str << Offset;
Jim Stichnothbca2f652014-11-01 10:13:54 -0700523 Str << "(%" << getRegName(getFrameOrStackReg(), Ty) << ")";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700524}
525
Jan Voung8acded02014-09-22 18:02:25 -0700526x86::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
Jim Stichnothdd165072014-11-02 09:41:45 -0800527 if (Var->hasReg())
528 llvm_unreachable("Stack Variable has a register assigned");
529 if (Var->getWeight().isInf())
530 llvm_unreachable("Infinite-weight Variable has no register assigned");
Jan Voung8acded02014-09-22 18:02:25 -0700531 int32_t Offset = Var->getStackOffset();
532 if (!hasFramePointer())
533 Offset += getStackAdjustment();
534 return x86::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
535}
536
Matt Wala45a06232014-07-09 16:33:22 -0700537void TargetX8632::lowerArguments() {
538 VarList &Args = Func->getArgs();
539 // The first four arguments of vector type, regardless of their
540 // position relative to the other arguments in the argument list, are
541 // passed in registers xmm0 - xmm3.
542 unsigned NumXmmArgs = 0;
543
544 Context.init(Func->getEntryNode());
545 Context.setInsertPoint(Context.getCur());
546
547 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
548 ++I) {
549 Variable *Arg = Args[I];
550 Type Ty = Arg->getType();
551 if (!isVectorType(Ty))
552 continue;
553 // Replace Arg in the argument list with the home register. Then
554 // generate an instruction in the prolog to copy the home register
555 // to the assigned location of Arg.
Jan Voungbd385e42014-09-18 18:18:10 -0700556 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
Matt Wala45a06232014-07-09 16:33:22 -0700557 ++NumXmmArgs;
558 IceString Name = "home_reg:" + Arg->getName();
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700559 Variable *RegisterArg = Func->makeVariable(Ty, Name);
Matt Wala45a06232014-07-09 16:33:22 -0700560 RegisterArg->setRegNum(RegNum);
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700561 RegisterArg->setIsArg();
562 Arg->setIsArg(false);
Matt Wala45a06232014-07-09 16:33:22 -0700563
564 Args[I] = RegisterArg;
565 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
566 }
567}
568
Matt Walad4799f42014-08-14 14:24:12 -0700569void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
570 // Sort the variables into buckets according to the log of their width
571 // in bytes.
572 const SizeT NumBuckets =
573 X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1;
574 VarList Buckets[NumBuckets];
575
Jim Stichnothf44f3712014-10-01 14:05:51 -0700576 for (Variable *Var : Source) {
Matt Walad4799f42014-08-14 14:24:12 -0700577 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());
Jan Voung1eb3a552014-09-12 11:05:47 -0700578 SizeT LogNaturalAlignment = llvm::findFirstSet(NaturalAlignment);
Matt Walad4799f42014-08-14 14:24:12 -0700579 assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE);
580 assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE);
581 SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE;
582 Buckets[BucketIndex].push_back(Var);
583 }
584
585 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {
586 VarList &List = Buckets[NumBuckets - I - 1];
587 Dest.insert(Dest.end(), List.begin(), List.end());
588 }
589}
590
Matt Wala45a06232014-07-09 16:33:22 -0700591// Helper function for addProlog().
592//
593// This assumes Arg is an argument passed on the stack. This sets the
594// frame offset for Arg and updates InArgsSizeBytes according to Arg's
595// width. For an I64 arg that has been split into Lo and Hi components,
596// it calls itself recursively on the components, taking care to handle
597// Lo first because of the little-endian architecture. Lastly, this
598// function generates an instruction to copy Arg into its assigned
599// register if applicable.
600void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
601 size_t BasicFrameOffset,
602 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700603 Variable *Lo = Arg->getLo();
604 Variable *Hi = Arg->getHi();
605 Type Ty = Arg->getType();
606 if (Lo && Hi && Ty == IceType_i64) {
607 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
608 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
Matt Wala45a06232014-07-09 16:33:22 -0700609 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
610 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700611 return;
612 }
Matt Wala105b7042014-08-11 19:56:19 -0700613 if (isVectorType(Ty)) {
614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
615 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
Matt Wala45a06232014-07-09 16:33:22 -0700617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700618 if (Arg->hasReg()) {
619 assert(Ty != IceType_i64);
620 OperandX8632Mem *Mem = OperandX8632Mem::create(
621 Func, Ty, FramePtr,
Jan Voungbc004632014-09-16 15:09:10 -0700622 Ctx->getConstantInt32(IceType_i32, Arg->getStackOffset()));
Matt Wala45a06232014-07-09 16:33:22 -0700623 if (isVectorType(Arg->getType())) {
624 _movp(Arg, Mem);
625 } else {
626 _mov(Arg, Mem);
627 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700628 // This argument-copying instruction uses an explicit
629 // OperandX8632Mem operand instead of a Variable, so its
630 // fill-from-stack operation has to be tracked separately for
631 // statistics.
632 Ctx->statsUpdateFills();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700633 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700634}
635
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700636Type TargetX8632::stackSlotType() { return IceType_i32; }
637
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700638void TargetX8632::addProlog(CfgNode *Node) {
Matt Walad4799f42014-08-14 14:24:12 -0700639 // Stack frame layout:
640 //
641 // +------------------------+
642 // | 1. return address |
643 // +------------------------+
644 // | 2. preserved registers |
645 // +------------------------+
646 // | 3. padding |
647 // +------------------------+
648 // | 4. global spill area |
649 // +------------------------+
650 // | 5. padding |
651 // +------------------------+
652 // | 6. local spill area |
653 // +------------------------+
654 // | 7. padding |
655 // +------------------------+
656 // | 8. allocas |
657 // +------------------------+
658 //
659 // The following variables record the size in bytes of the given areas:
660 // * X86_RET_IP_SIZE_BYTES: area 1
661 // * PreservedRegsSizeBytes: area 2
662 // * SpillAreaPaddingBytes: area 3
663 // * GlobalsSize: area 4
664 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
665 // * LocalsSpillAreaSize: area 6
666 // * SpillAreaSizeBytes: areas 3 - 7
667
Jim Stichnoth33c80642014-11-11 09:49:04 -0800668 // Make a final pass over the Cfg to determine which variables need
669 // stack slots.
670 llvm::BitVector IsVarReferenced(Func->getNumVariables());
671 for (CfgNode *Node : Func->getNodes()) {
672 for (auto Inst = Node->getInsts().begin(), E = Node->getInsts().end();
673 Inst != E; ++Inst) {
674 if (Inst->isDeleted())
675 continue;
676 if (const Variable *Var = Inst->getDest())
677 IsVarReferenced[Var->getIndex()] = true;
678 for (SizeT I = 0; I < Inst->getSrcSize(); ++I) {
679 Operand *Src = Inst->getSrc(I);
680 SizeT NumVars = Src->getNumVars();
681 for (SizeT J = 0; J < NumVars; ++J) {
682 const Variable *Var = Src->getVar(J);
683 IsVarReferenced[Var->getIndex()] = true;
684 }
685 }
686 }
687 }
688
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700689 // If SimpleCoalescing is false, each variable without a register
690 // gets its own unique stack slot, which leads to large stack
691 // frames. If SimpleCoalescing is true, then each "global" variable
692 // without a register gets its own slot, but "local" variable slots
693 // are reused across basic blocks. E.g., if A and B are local to
Jan Voung44d53e12014-09-11 19:18:03 -0700694 // block 1 and C is local to block 2, then C may share a slot with A or B.
695 //
696 // We cannot coalesce stack slots if this function calls a "returns twice"
697 // function. In that case, basic blocks may be revisited, and variables
698 // local to those basic blocks are actually live until after the
699 // called function returns a second time.
700 const bool SimpleCoalescing = !callsReturnsTwice();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700701 size_t InArgsSizeBytes = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700702 size_t PreservedRegsSizeBytes = 0;
Matt Walad4799f42014-08-14 14:24:12 -0700703 SpillAreaSizeBytes = 0;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700704 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700705 Context.init(Node);
706 Context.setInsertPoint(Context.getCur());
707
708 // Determine stack frame offsets for each Variable without a
709 // register assignment. This can be done as one variable per stack
710 // slot. Or, do coalescing by running the register allocator again
711 // with an infinite set of registers (as a side effect, this gives
712 // variables a second chance at physical register assignment).
713 //
714 // A middle ground approach is to leverage sparsity and allocate one
715 // block of space on the frame for globals (variables with
716 // multi-block lifetime), and one block to share for locals
717 // (single-block lifetime).
718
719 llvm::SmallBitVector CalleeSaves =
720 getRegisterSet(RegSet_CalleeSave, RegSet_None);
721
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700722 size_t GlobalsSize = 0;
723 std::vector<size_t> LocalsSize(Func->getNumNodes());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700724
725 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
Matt Walad4799f42014-08-14 14:24:12 -0700726 // SpillAreaSizeBytes.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700727 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
728 const VarList &Variables = Func->getVariables();
729 const VarList &Args = Func->getArgs();
Jim Stichnoth800dab22014-09-20 12:25:02 -0700730 VarList SpilledVariables, SortedSpilledVariables, VariablesLinkedToSpillSlots;
Matt Walad4799f42014-08-14 14:24:12 -0700731
732 // If there is a separate locals area, this specifies the alignment
733 // for it.
734 uint32_t LocalsSlotsAlignmentBytes = 0;
735 // The entire spill locations area gets aligned to largest natural
736 // alignment of the variables that have a spill slot.
737 uint32_t SpillAreaAlignmentBytes = 0;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700738 for (Variable *Var : Variables) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700739 if (Var->hasReg()) {
740 RegsUsed[Var->getRegNum()] = true;
741 continue;
742 }
Matt Wala45a06232014-07-09 16:33:22 -0700743 // An argument either does not need a stack slot (if passed in a
744 // register) or already has one (if passed on the stack).
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700745 if (Var->getIsArg())
746 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700747 // An unreferenced variable doesn't need a stack slot.
Jim Stichnoth33c80642014-11-11 09:49:04 -0800748 if (!IsVarReferenced[Var->getIndex()])
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700749 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700750 // A spill slot linked to a variable with a stack slot should reuse
751 // that stack slot.
Jim Stichnoth800dab22014-09-20 12:25:02 -0700752 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
753 assert(Var->getWeight() == RegWeight::Zero);
754 if (!SpillVar->getLinkedTo()->hasReg()) {
755 VariablesLinkedToSpillSlots.push_back(Var);
756 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700757 }
758 }
Matt Walad4799f42014-08-14 14:24:12 -0700759 SpilledVariables.push_back(Var);
760 }
761
762 SortedSpilledVariables.reserve(SpilledVariables.size());
763 sortByAlignment(SortedSpilledVariables, SpilledVariables);
Jim Stichnothf44f3712014-10-01 14:05:51 -0700764 for (Variable *Var : SortedSpilledVariables) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700765 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Matt Walad4799f42014-08-14 14:24:12 -0700766 if (!SpillAreaAlignmentBytes)
767 SpillAreaAlignmentBytes = Increment;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700768 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
769 if (VMetadata->isMultiBlock(Var)) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700770 GlobalsSize += Increment;
771 } else {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700772 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700773 LocalsSize[NodeIndex] += Increment;
Matt Walad4799f42014-08-14 14:24:12 -0700774 if (LocalsSize[NodeIndex] > SpillAreaSizeBytes)
775 SpillAreaSizeBytes = LocalsSize[NodeIndex];
776 if (!LocalsSlotsAlignmentBytes)
777 LocalsSlotsAlignmentBytes = Increment;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700778 }
779 } else {
Matt Walad4799f42014-08-14 14:24:12 -0700780 SpillAreaSizeBytes += Increment;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700781 }
782 }
Matt Walad4799f42014-08-14 14:24:12 -0700783 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
784
785 SpillAreaSizeBytes += GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700786
787 // Add push instructions for preserved registers.
Jim Stichnoth18735602014-09-16 19:59:35 -0700788 uint32_t NumCallee = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700789 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
790 if (CalleeSaves[i] && RegsUsed[i]) {
Jim Stichnoth18735602014-09-16 19:59:35 -0700791 ++NumCallee;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700792 PreservedRegsSizeBytes += 4;
Jan Voung0b9eee52014-10-07 11:20:10 -0700793 _push(getPhysicalRegister(i));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700794 }
795 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700796 Ctx->statsUpdateRegistersSaved(NumCallee);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700797
798 // Generate "push ebp; mov ebp, esp"
799 if (IsEbpBasedFrame) {
800 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
801 .count() == 0);
802 PreservedRegsSizeBytes += 4;
Jan Voungbd385e42014-09-18 18:18:10 -0700803 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
804 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung0b9eee52014-10-07 11:20:10 -0700805 _push(ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700806 _mov(ebp, esp);
807 }
808
Matt Walad4799f42014-08-14 14:24:12 -0700809 // Align the variables area. SpillAreaPaddingBytes is the size of
810 // the region after the preserved registers and before the spill
811 // areas.
812 uint32_t SpillAreaPaddingBytes = 0;
813 if (SpillAreaAlignmentBytes) {
814 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
815 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
816 uint32_t SpillAreaStart =
817 applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
818 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
819 SpillAreaSizeBytes += SpillAreaPaddingBytes;
Matt Wala105b7042014-08-11 19:56:19 -0700820 }
821
Matt Walad4799f42014-08-14 14:24:12 -0700822 // If there are separate globals and locals areas, make sure the
823 // locals area is aligned by padding the end of the globals area.
824 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
825 if (LocalsSlotsAlignmentBytes) {
826 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
827 GlobalsAndSubsequentPaddingSize =
828 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
829 SpillAreaSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;
830 }
831
832 // Align esp if necessary.
833 if (NeedsStackAlignment) {
834 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
835 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
836 SpillAreaSizeBytes = StackSize - StackOffset;
837 }
838
839 // Generate "sub esp, SpillAreaSizeBytes"
840 if (SpillAreaSizeBytes)
Jan Voungbd385e42014-09-18 18:18:10 -0700841 _sub(getPhysicalRegister(RegX8632::Reg_esp),
Jan Voungbc004632014-09-16 15:09:10 -0700842 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
Jim Stichnoth18735602014-09-16 19:59:35 -0700843 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700844
845 resetStackAdjustment();
846
Matt Wala45a06232014-07-09 16:33:22 -0700847 // Fill in stack offsets for stack args, and copy args into registers
848 // for those that were register-allocated. Args are pushed right to
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700849 // left, so Arg[0] is closest to the stack/frame pointer.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700850 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Matt Wala105b7042014-08-11 19:56:19 -0700851 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700852 if (!IsEbpBasedFrame)
Matt Walad4799f42014-08-14 14:24:12 -0700853 BasicFrameOffset += SpillAreaSizeBytes;
Matt Wala45a06232014-07-09 16:33:22 -0700854
855 unsigned NumXmmArgs = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700856 for (SizeT i = 0; i < Args.size(); ++i) {
857 Variable *Arg = Args[i];
Matt Wala45a06232014-07-09 16:33:22 -0700858 // Skip arguments passed in registers.
859 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
860 ++NumXmmArgs;
861 continue;
862 }
863 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700864 }
865
866 // Fill in stack offsets for locals.
Matt Walad4799f42014-08-14 14:24:12 -0700867 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700868 LocalsSize.assign(LocalsSize.size(), 0);
Matt Walad4799f42014-08-14 14:24:12 -0700869 size_t NextStackOffset = GlobalsSpaceUsed;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700870 for (Variable *Var : SortedSpilledVariables) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700871 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700872 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
873 if (VMetadata->isMultiBlock(Var)) {
Matt Walad4799f42014-08-14 14:24:12 -0700874 GlobalsSpaceUsed += Increment;
875 NextStackOffset = GlobalsSpaceUsed;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700876 } else {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700877 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700878 LocalsSize[NodeIndex] += Increment;
Matt Walad4799f42014-08-14 14:24:12 -0700879 NextStackOffset = SpillAreaPaddingBytes +
880 GlobalsAndSubsequentPaddingSize +
881 LocalsSize[NodeIndex];
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700882 }
883 } else {
884 NextStackOffset += Increment;
885 }
886 if (IsEbpBasedFrame)
887 Var->setStackOffset(-NextStackOffset);
888 else
Matt Walad4799f42014-08-14 14:24:12 -0700889 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700890 }
Matt Walad4799f42014-08-14 14:24:12 -0700891 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700892 this->HasComputedFrame = true;
893
Matt Walad4799f42014-08-14 14:24:12 -0700894 // Assign stack offsets to variables that have been linked to spilled
895 // variables.
Jim Stichnothf44f3712014-10-01 14:05:51 -0700896 for (Variable *Var : VariablesLinkedToSpillSlots) {
Jim Stichnoth800dab22014-09-20 12:25:02 -0700897 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
Matt Walad4799f42014-08-14 14:24:12 -0700898 Var->setStackOffset(Linked->getStackOffset());
899 }
900
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700901 if (Func->getContext()->isVerbose(IceV_Frame)) {
Matt Walad4799f42014-08-14 14:24:12 -0700902 Ostream &Str = Func->getContext()->getStrDump();
903
904 Str << "Stack layout:\n";
905 uint32_t EspAdjustmentPaddingSize =
906 SpillAreaSizeBytes - LocalsSpillAreaSize -
907 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
908 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
909 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
910 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
911 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
912 << " globals spill area = " << GlobalsSize << " bytes\n"
913 << " globals-locals spill areas intermediate padding = "
914 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
915 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
916 << " esp alignment padding = " << EspAdjustmentPaddingSize
917 << " bytes\n";
918
919 Str << "Stack details:\n"
920 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
921 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
922 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
923 << " bytes\n"
924 << " is ebp based = " << IsEbpBasedFrame << "\n";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700925 }
926}
927
928void TargetX8632::addEpilog(CfgNode *Node) {
929 InstList &Insts = Node->getInsts();
930 InstList::reverse_iterator RI, E;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700931 // TODO(stichnot): Use llvm::make_range with LLVM 3.5.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700932 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
933 if (llvm::isa<InstX8632Ret>(*RI))
934 break;
935 }
936 if (RI == E)
937 return;
938
939 // Convert the reverse_iterator position into its corresponding
940 // (forward) iterator position.
941 InstList::iterator InsertPoint = RI.base();
942 --InsertPoint;
943 Context.init(Node);
944 Context.setInsertPoint(InsertPoint);
945
Jan Voungbd385e42014-09-18 18:18:10 -0700946 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700947 if (IsEbpBasedFrame) {
Jan Voungbd385e42014-09-18 18:18:10 -0700948 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700949 _mov(esp, ebp);
950 _pop(ebp);
951 } else {
Matt Walad4799f42014-08-14 14:24:12 -0700952 // add esp, SpillAreaSizeBytes
953 if (SpillAreaSizeBytes)
Jan Voungbc004632014-09-16 15:09:10 -0700954 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700955 }
956
957 // Add pop instructions for preserved registers.
958 llvm::SmallBitVector CalleeSaves =
959 getRegisterSet(RegSet_CalleeSave, RegSet_None);
960 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
961 SizeT j = CalleeSaves.size() - i - 1;
Jan Voungbd385e42014-09-18 18:18:10 -0700962 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700963 continue;
964 if (CalleeSaves[j] && RegsUsed[j]) {
965 _pop(getPhysicalRegister(j));
966 }
967 }
968}
969
Jim Stichnothf61d5b22014-05-23 13:31:24 -0700970template <typename T> struct PoolTypeConverter {};
971
972template <> struct PoolTypeConverter<float> {
973 typedef float PrimitiveFpType;
974 typedef uint32_t PrimitiveIntType;
975 typedef ConstantFloat IceType;
976 static const Type Ty = IceType_f32;
977 static const char *TypeName;
978 static const char *AsmTag;
979 static const char *PrintfString;
980};
981const char *PoolTypeConverter<float>::TypeName = "float";
982const char *PoolTypeConverter<float>::AsmTag = ".long";
983const char *PoolTypeConverter<float>::PrintfString = "0x%x";
984
985template <> struct PoolTypeConverter<double> {
986 typedef double PrimitiveFpType;
987 typedef uint64_t PrimitiveIntType;
988 typedef ConstantDouble IceType;
989 static const Type Ty = IceType_f64;
990 static const char *TypeName;
991 static const char *AsmTag;
992 static const char *PrintfString;
993};
994const char *PoolTypeConverter<double>::TypeName = "double";
995const char *PoolTypeConverter<double>::AsmTag = ".quad";
996const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
997
998template <typename T> void TargetX8632::emitConstantPool() const {
999 Ostream &Str = Ctx->getStrEmit();
1000 Type Ty = T::Ty;
1001 SizeT Align = typeAlignInBytes(Ty);
1002 ConstantList Pool = Ctx->getConstantPool(Ty);
1003
1004 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
1005 << "\n";
1006 Str << "\t.align\t" << Align << "\n";
Jim Stichnothf44f3712014-10-01 14:05:51 -07001007 for (Constant *C : Pool) {
1008 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
Jim Stichnothf61d5b22014-05-23 13:31:24 -07001009 typename T::PrimitiveFpType Value = Const->getValue();
1010 // Use memcpy() to copy bits from Value into RawValue in a way
1011 // that avoids breaking strict-aliasing rules.
1012 typename T::PrimitiveIntType RawValue;
1013 memcpy(&RawValue, &Value, sizeof(Value));
1014 char buf[30];
1015 int CharsPrinted =
1016 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
1017 assert(CharsPrinted >= 0 &&
1018 (size_t)CharsPrinted < llvm::array_lengthof(buf));
1019 (void)CharsPrinted; // avoid warnings if asserts are disabled
Jim Stichnothbca2f652014-11-01 10:13:54 -07001020 Str << ".L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
Jim Stichnothf61d5b22014-05-23 13:31:24 -07001021 Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
1022 << Value << "\n";
1023 }
1024}
1025
1026void TargetX8632::emitConstants() const {
1027 emitConstantPool<PoolTypeConverter<float> >();
1028 emitConstantPool<PoolTypeConverter<double> >();
1029
1030 // No need to emit constants from the int pool since (for x86) they
1031 // are embedded as immediates in the instructions.
1032}
1033
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001034void TargetX8632::split64(Variable *Var) {
1035 switch (Var->getType()) {
1036 default:
1037 return;
1038 case IceType_i64:
1039 // TODO: Only consider F64 if we need to push each half when
1040 // passing as an argument to a function call. Note that each half
1041 // is still typed as I32.
1042 case IceType_f64:
1043 break;
1044 }
1045 Variable *Lo = Var->getLo();
1046 Variable *Hi = Var->getHi();
1047 if (Lo) {
1048 assert(Hi);
1049 return;
1050 }
1051 assert(Hi == NULL);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07001052 Lo = Func->makeVariable(IceType_i32, Var->getName() + "__lo");
1053 Hi = Func->makeVariable(IceType_i32, Var->getName() + "__hi");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001054 Var->setLoHi(Lo, Hi);
1055 if (Var->getIsArg()) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07001056 Lo->setIsArg();
1057 Hi->setIsArg();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001058 }
1059}
1060
1061Operand *TargetX8632::loOperand(Operand *Operand) {
1062 assert(Operand->getType() == IceType_i64);
1063 if (Operand->getType() != IceType_i64)
1064 return Operand;
1065 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1066 split64(Var);
1067 return Var->getLo();
1068 }
Jan Voungbc004632014-09-16 15:09:10 -07001069 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1070 return Ctx->getConstantInt32(IceType_i32,
1071 static_cast<uint32_t>(Const->getValue()));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001072 }
1073 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1074 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
1075 Mem->getOffset(), Mem->getIndex(),
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001076 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001077 }
1078 llvm_unreachable("Unsupported operand type");
1079 return NULL;
1080}
1081
1082Operand *TargetX8632::hiOperand(Operand *Operand) {
1083 assert(Operand->getType() == IceType_i64);
1084 if (Operand->getType() != IceType_i64)
1085 return Operand;
1086 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1087 split64(Var);
1088 return Var->getHi();
1089 }
Jan Voungbc004632014-09-16 15:09:10 -07001090 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1091 return Ctx->getConstantInt32(
1092 IceType_i32, static_cast<uint32_t>(Const->getValue() >> 32));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001093 }
1094 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1095 Constant *Offset = Mem->getOffset();
1096 if (Offset == NULL)
Jan Voungbc004632014-09-16 15:09:10 -07001097 Offset = Ctx->getConstantInt32(IceType_i32, 4);
1098 else if (ConstantInteger32 *IntOffset =
1099 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1100 Offset = Ctx->getConstantInt32(IceType_i32, 4 + IntOffset->getValue());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001101 } else if (ConstantRelocatable *SymOffset =
1102 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
Jan Voungfe14fb82014-10-13 15:56:32 -07001103 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001104 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
1105 SymOffset->getName());
1106 }
1107 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001108 Mem->getIndex(), Mem->getShift(),
1109 Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001110 }
1111 llvm_unreachable("Unsupported operand type");
1112 return NULL;
1113}
1114
1115llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
1116 RegSetMask Exclude) const {
Jan Voungbd385e42014-09-18 18:18:10 -07001117 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001118
Jan Voungbd385e42014-09-18 18:18:10 -07001119#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001120 frameptr, isI8, isInt, isFP) \
1121 if (scratch && (Include & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001122 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001123 if (preserved && (Include & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001124 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001125 if (stackptr && (Include & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001126 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001127 if (frameptr && (Include & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001128 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001129 if (scratch && (Exclude & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001130 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001131 if (preserved && (Exclude & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001132 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001133 if (stackptr && (Exclude & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001134 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001135 if (frameptr && (Exclude & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001136 Registers[RegX8632::val] = false;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001137
1138 REGX8632_TABLE
1139
1140#undef X
1141
1142 return Registers;
1143}
1144
1145void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
1146 IsEbpBasedFrame = true;
Matt Wala105b7042014-08-11 19:56:19 -07001147 // Conservatively require the stack to be aligned. Some stack
1148 // adjustment operations implemented below assume that the stack is
1149 // aligned before the alloca. All the alloca code ensures that the
1150 // stack alignment is preserved after the alloca. The stack alignment
1151 // restriction can be relaxed in some cases.
1152 NeedsStackAlignment = true;
1153
1154 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.
Jan Voungbd385e42014-09-18 18:18:10 -07001155 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001156 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1157 Variable *Dest = Inst->getDest();
Matt Wala105b7042014-08-11 19:56:19 -07001158 uint32_t AlignmentParam = Inst->getAlignInBytes();
Jim Stichnoth72a8f8d2014-09-08 17:56:50 -07001159 // For default align=0, set it to the real value 1, to avoid any
1160 // bit-manipulation problems below.
1161 AlignmentParam = std::max(AlignmentParam, 1u);
Matt Wala105b7042014-08-11 19:56:19 -07001162
1163 // LLVM enforces power of 2 alignment.
1164 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
1165 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
1166
1167 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1168 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
Jan Voungbc004632014-09-16 15:09:10 -07001169 _and(esp, Ctx->getConstantInt32(IceType_i32, -Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001170 }
Jan Voungbc004632014-09-16 15:09:10 -07001171 if (ConstantInteger32 *ConstantTotalSize =
1172 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
Matt Wala105b7042014-08-11 19:56:19 -07001173 uint32_t Value = ConstantTotalSize->getValue();
Matt Walad4799f42014-08-14 14:24:12 -07001174 Value = applyAlignment(Value, Alignment);
Jan Voungbc004632014-09-16 15:09:10 -07001175 _sub(esp, Ctx->getConstantInt32(IceType_i32, Value));
Matt Wala105b7042014-08-11 19:56:19 -07001176 } else {
1177 // Non-constant sizes need to be adjusted to the next highest
1178 // multiple of the required alignment at runtime.
1179 Variable *T = makeReg(IceType_i32);
1180 _mov(T, TotalSize);
Jan Voungbc004632014-09-16 15:09:10 -07001181 _add(T, Ctx->getConstantInt32(IceType_i32, Alignment - 1));
1182 _and(T, Ctx->getConstantInt32(IceType_i32, -Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001183 _sub(esp, T);
1184 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001185 _mov(Dest, esp);
1186}
1187
1188void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1189 Variable *Dest = Inst->getDest();
1190 Operand *Src0 = legalize(Inst->getSrc(0));
1191 Operand *Src1 = legalize(Inst->getSrc(1));
1192 if (Dest->getType() == IceType_i64) {
1193 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1194 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1195 Operand *Src0Lo = loOperand(Src0);
1196 Operand *Src0Hi = hiOperand(Src0);
1197 Operand *Src1Lo = loOperand(Src1);
1198 Operand *Src1Hi = hiOperand(Src1);
1199 Variable *T_Lo = NULL, *T_Hi = NULL;
1200 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001201 case InstArithmetic::_num:
1202 llvm_unreachable("Unknown arithmetic operator");
1203 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001204 case InstArithmetic::Add:
1205 _mov(T_Lo, Src0Lo);
1206 _add(T_Lo, Src1Lo);
1207 _mov(DestLo, T_Lo);
1208 _mov(T_Hi, Src0Hi);
1209 _adc(T_Hi, Src1Hi);
1210 _mov(DestHi, T_Hi);
1211 break;
1212 case InstArithmetic::And:
1213 _mov(T_Lo, Src0Lo);
1214 _and(T_Lo, Src1Lo);
1215 _mov(DestLo, T_Lo);
1216 _mov(T_Hi, Src0Hi);
1217 _and(T_Hi, Src1Hi);
1218 _mov(DestHi, T_Hi);
1219 break;
1220 case InstArithmetic::Or:
1221 _mov(T_Lo, Src0Lo);
1222 _or(T_Lo, Src1Lo);
1223 _mov(DestLo, T_Lo);
1224 _mov(T_Hi, Src0Hi);
1225 _or(T_Hi, Src1Hi);
1226 _mov(DestHi, T_Hi);
1227 break;
1228 case InstArithmetic::Xor:
1229 _mov(T_Lo, Src0Lo);
1230 _xor(T_Lo, Src1Lo);
1231 _mov(DestLo, T_Lo);
1232 _mov(T_Hi, Src0Hi);
1233 _xor(T_Hi, Src1Hi);
1234 _mov(DestHi, T_Hi);
1235 break;
1236 case InstArithmetic::Sub:
1237 _mov(T_Lo, Src0Lo);
1238 _sub(T_Lo, Src1Lo);
1239 _mov(DestLo, T_Lo);
1240 _mov(T_Hi, Src0Hi);
1241 _sbb(T_Hi, Src1Hi);
1242 _mov(DestHi, T_Hi);
1243 break;
1244 case InstArithmetic::Mul: {
1245 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
Jan Voungbd385e42014-09-18 18:18:10 -07001246 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
1247 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001248 // gcc does the following:
1249 // a=b*c ==>
1250 // t1 = b.hi; t1 *=(imul) c.lo
1251 // t2 = c.hi; t2 *=(imul) b.lo
1252 // t3:eax = b.lo
1253 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1254 // a.lo = t4.lo
1255 // t4.hi += t1
1256 // t4.hi += t2
1257 // a.hi = t4.hi
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07001258 // The mul instruction cannot take an immediate operand.
1259 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001260 _mov(T_1, Src0Hi);
1261 _imul(T_1, Src1Lo);
1262 _mov(T_2, Src1Hi);
1263 _imul(T_2, Src0Lo);
Jan Voungbd385e42014-09-18 18:18:10 -07001264 _mov(T_3, Src0Lo, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001265 _mul(T_4Lo, T_3, Src1Lo);
1266 // The mul instruction produces two dest variables, edx:eax. We
1267 // create a fake definition of edx to account for this.
1268 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1269 _mov(DestLo, T_4Lo);
1270 _add(T_4Hi, T_1);
1271 _add(T_4Hi, T_2);
1272 _mov(DestHi, T_4Hi);
1273 } break;
1274 case InstArithmetic::Shl: {
1275 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1276 // gcc does the following:
1277 // a=b<<c ==>
1278 // t1:ecx = c.lo & 0xff
1279 // t2 = b.lo
1280 // t3 = b.hi
1281 // t3 = shld t3, t2, t1
1282 // t2 = shl t2, t1
1283 // test t1, 0x20
1284 // je L1
1285 // use(t3)
1286 // t3 = t2
1287 // t2 = 0
1288 // L1:
1289 // a.lo = t2
1290 // a.hi = t3
1291 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
Jan Voungbc004632014-09-16 15:09:10 -07001292 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001293 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001294 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001295 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001296 _mov(T_2, Src0Lo);
1297 _mov(T_3, Src0Hi);
1298 _shld(T_3, T_2, T_1);
1299 _shl(T_2, T_1);
1300 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001301 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001302 // T_2 and T_3 are being assigned again because of the
1303 // intra-block control flow, so we need the _mov_nonkillable
1304 // variant to avoid liveness problems.
1305 _mov_nonkillable(T_3, T_2);
1306 _mov_nonkillable(T_2, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001307 Context.insert(Label);
1308 _mov(DestLo, T_2);
1309 _mov(DestHi, T_3);
1310 } break;
1311 case InstArithmetic::Lshr: {
1312 // a=b>>c (unsigned) ==>
1313 // t1:ecx = c.lo & 0xff
1314 // t2 = b.lo
1315 // t3 = b.hi
1316 // t2 = shrd t2, t3, t1
1317 // t3 = shr t3, t1
1318 // test t1, 0x20
1319 // je L1
1320 // use(t2)
1321 // t2 = t3
1322 // t3 = 0
1323 // L1:
1324 // a.lo = t2
1325 // a.hi = t3
1326 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
Jan Voungbc004632014-09-16 15:09:10 -07001327 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001328 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001329 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001330 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001331 _mov(T_2, Src0Lo);
1332 _mov(T_3, Src0Hi);
1333 _shrd(T_2, T_3, T_1);
1334 _shr(T_3, T_1);
1335 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001336 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001337 // T_2 and T_3 are being assigned again because of the
1338 // intra-block control flow, so we need the _mov_nonkillable
1339 // variant to avoid liveness problems.
1340 _mov_nonkillable(T_2, T_3);
1341 _mov_nonkillable(T_3, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001342 Context.insert(Label);
1343 _mov(DestLo, T_2);
1344 _mov(DestHi, T_3);
1345 } break;
1346 case InstArithmetic::Ashr: {
1347 // a=b>>c (signed) ==>
1348 // t1:ecx = c.lo & 0xff
1349 // t2 = b.lo
1350 // t3 = b.hi
1351 // t2 = shrd t2, t3, t1
1352 // t3 = sar t3, t1
1353 // test t1, 0x20
1354 // je L1
1355 // use(t2)
1356 // t2 = t3
1357 // t3 = sar t3, 0x1f
1358 // L1:
1359 // a.lo = t2
1360 // a.hi = t3
1361 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
Jan Voungbc004632014-09-16 15:09:10 -07001362 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1363 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001364 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001365 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001366 _mov(T_2, Src0Lo);
1367 _mov(T_3, Src0Hi);
1368 _shrd(T_2, T_3, T_1);
1369 _sar(T_3, T_1);
1370 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001371 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001372 // T_2 and T_3 are being assigned again because of the
1373 // intra-block control flow, so T_2 needs the _mov_nonkillable
1374 // variant to avoid liveness problems. T_3 doesn't need special
1375 // treatment because it is reassigned via _sar instead of _mov.
1376 _mov_nonkillable(T_2, T_3);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001377 _sar(T_3, SignExtend);
1378 Context.insert(Label);
1379 _mov(DestLo, T_2);
1380 _mov(DestHi, T_3);
1381 } break;
1382 case InstArithmetic::Udiv: {
1383 const SizeT MaxSrcs = 2;
1384 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
1385 Call->addArg(Inst->getSrc(0));
1386 Call->addArg(Inst->getSrc(1));
1387 lowerCall(Call);
1388 } break;
1389 case InstArithmetic::Sdiv: {
1390 const SizeT MaxSrcs = 2;
1391 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
1392 Call->addArg(Inst->getSrc(0));
1393 Call->addArg(Inst->getSrc(1));
1394 lowerCall(Call);
1395 } break;
1396 case InstArithmetic::Urem: {
1397 const SizeT MaxSrcs = 2;
1398 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
1399 Call->addArg(Inst->getSrc(0));
1400 Call->addArg(Inst->getSrc(1));
1401 lowerCall(Call);
1402 } break;
1403 case InstArithmetic::Srem: {
1404 const SizeT MaxSrcs = 2;
1405 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
1406 Call->addArg(Inst->getSrc(0));
1407 Call->addArg(Inst->getSrc(1));
1408 lowerCall(Call);
1409 } break;
1410 case InstArithmetic::Fadd:
1411 case InstArithmetic::Fsub:
1412 case InstArithmetic::Fmul:
1413 case InstArithmetic::Fdiv:
1414 case InstArithmetic::Frem:
1415 llvm_unreachable("FP instruction with i64 type");
1416 break;
1417 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001418 } else if (isVectorType(Dest->getType())) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001419 // TODO: Trap on integer divide and integer modulo by zero.
1420 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
Matt Wala8d1072e2014-07-11 15:43:51 -07001421 switch (Inst->getOp()) {
1422 case InstArithmetic::_num:
1423 llvm_unreachable("Unknown arithmetic operator");
1424 break;
Matt Wala7fa22d82014-07-17 12:41:31 -07001425 case InstArithmetic::Add: {
1426 Variable *T = makeReg(Dest->getType());
1427 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001428 _padd(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001429 _movp(Dest, T);
1430 } break;
1431 case InstArithmetic::And: {
1432 Variable *T = makeReg(Dest->getType());
1433 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001434 _pand(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001435 _movp(Dest, T);
1436 } break;
1437 case InstArithmetic::Or: {
1438 Variable *T = makeReg(Dest->getType());
1439 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001440 _por(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001441 _movp(Dest, T);
1442 } break;
1443 case InstArithmetic::Xor: {
1444 Variable *T = makeReg(Dest->getType());
1445 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001446 _pxor(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001447 _movp(Dest, T);
1448 } break;
1449 case InstArithmetic::Sub: {
1450 Variable *T = makeReg(Dest->getType());
1451 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001452 _psub(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001453 _movp(Dest, T);
1454 } break;
1455 case InstArithmetic::Mul: {
Matt Wala0a450512014-07-30 12:44:39 -07001456 bool TypesAreValidForPmull =
1457 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1458 bool InstructionSetIsValidForPmull =
1459 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1460 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1461 Variable *T = makeReg(Dest->getType());
1462 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001463 _pmull(T, Src1);
Matt Wala0a450512014-07-30 12:44:39 -07001464 _movp(Dest, T);
1465 } else if (Dest->getType() == IceType_v4i32) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001466 // Lowering sequence:
1467 // Note: The mask arguments have index 0 on the left.
1468 //
1469 // movups T1, Src0
1470 // pshufd T2, Src0, {1,0,3,0}
1471 // pshufd T3, Src1, {1,0,3,0}
1472 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1473 // pmuludq T1, Src1
1474 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1475 // pmuludq T2, T3
1476 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1477 // shufps T1, T2, {0,2,0,2}
1478 // pshufd T4, T1, {0,2,1,3}
1479 // movups Dest, T4
Matt Wala7fa22d82014-07-17 12:41:31 -07001480
1481 // Mask that directs pshufd to create a vector with entries
1482 // Src[1, 0, 3, 0]
1483 const unsigned Constant1030 = 0x31;
Jan Voungbc004632014-09-16 15:09:10 -07001484 Constant *Mask1030 = Ctx->getConstantInt32(IceType_i8, Constant1030);
Matt Wala7fa22d82014-07-17 12:41:31 -07001485 // Mask that directs shufps to create a vector with entries
1486 // Dest[0, 2], Src[0, 2]
1487 const unsigned Mask0202 = 0x88;
1488 // Mask that directs pshufd to create a vector with entries
1489 // Src[0, 2, 1, 3]
1490 const unsigned Mask0213 = 0xd8;
1491 Variable *T1 = makeReg(IceType_v4i32);
1492 Variable *T2 = makeReg(IceType_v4i32);
1493 Variable *T3 = makeReg(IceType_v4i32);
1494 Variable *T4 = makeReg(IceType_v4i32);
1495 _movp(T1, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001496 _pshufd(T2, Src0, Mask1030);
1497 _pshufd(T3, Src1, Mask1030);
1498 _pmuludq(T1, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001499 _pmuludq(T2, T3);
Jan Voungbc004632014-09-16 15:09:10 -07001500 _shufps(T1, T2, Ctx->getConstantInt32(IceType_i8, Mask0202));
1501 _pshufd(T4, T1, Ctx->getConstantInt32(IceType_i8, Mask0213));
Matt Wala7fa22d82014-07-17 12:41:31 -07001502 _movp(Dest, T4);
Matt Wala7fa22d82014-07-17 12:41:31 -07001503 } else {
1504 assert(Dest->getType() == IceType_v16i8);
Matt Walaafeaee42014-08-07 13:47:30 -07001505 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001506 }
1507 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001508 case InstArithmetic::Shl:
1509 case InstArithmetic::Lshr:
1510 case InstArithmetic::Ashr:
1511 case InstArithmetic::Udiv:
1512 case InstArithmetic::Urem:
1513 case InstArithmetic::Sdiv:
1514 case InstArithmetic::Srem:
1515 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1516 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001517 case InstArithmetic::Fadd: {
1518 Variable *T = makeReg(Dest->getType());
1519 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001520 _addps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001521 _movp(Dest, T);
1522 } break;
1523 case InstArithmetic::Fsub: {
1524 Variable *T = makeReg(Dest->getType());
1525 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001526 _subps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001527 _movp(Dest, T);
1528 } break;
1529 case InstArithmetic::Fmul: {
1530 Variable *T = makeReg(Dest->getType());
1531 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001532 _mulps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001533 _movp(Dest, T);
1534 } break;
1535 case InstArithmetic::Fdiv: {
1536 Variable *T = makeReg(Dest->getType());
1537 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001538 _divps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001539 _movp(Dest, T);
1540 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001541 case InstArithmetic::Frem:
1542 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1543 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001544 }
1545 } else { // Dest->getType() is non-i64 scalar
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001546 Variable *T_edx = NULL;
1547 Variable *T = NULL;
1548 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001549 case InstArithmetic::_num:
1550 llvm_unreachable("Unknown arithmetic operator");
1551 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001552 case InstArithmetic::Add:
1553 _mov(T, Src0);
1554 _add(T, Src1);
1555 _mov(Dest, T);
1556 break;
1557 case InstArithmetic::And:
1558 _mov(T, Src0);
1559 _and(T, Src1);
1560 _mov(Dest, T);
1561 break;
1562 case InstArithmetic::Or:
1563 _mov(T, Src0);
1564 _or(T, Src1);
1565 _mov(Dest, T);
1566 break;
1567 case InstArithmetic::Xor:
1568 _mov(T, Src0);
1569 _xor(T, Src1);
1570 _mov(Dest, T);
1571 break;
1572 case InstArithmetic::Sub:
1573 _mov(T, Src0);
1574 _sub(T, Src1);
1575 _mov(Dest, T);
1576 break;
1577 case InstArithmetic::Mul:
1578 // TODO: Optimize for llvm::isa<Constant>(Src1)
1579 // TODO: Strength-reduce multiplications by a constant,
1580 // particularly -1 and powers of 2. Advanced: use lea to
1581 // multiply by 3, 5, 9.
1582 //
1583 // The 8-bit version of imul only allows the form "imul r/m8"
1584 // where T must be in eax.
Jan Voung0ac50dc2014-09-30 08:36:06 -07001585 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001586 _mov(T, Src0, RegX8632::Reg_eax);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001587 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1588 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001589 _mov(T, Src0);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001590 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001591 _imul(T, Src1);
1592 _mov(Dest, T);
1593 break;
1594 case InstArithmetic::Shl:
1595 _mov(T, Src0);
1596 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001597 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001598 _shl(T, Src1);
1599 _mov(Dest, T);
1600 break;
1601 case InstArithmetic::Lshr:
1602 _mov(T, Src0);
1603 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001604 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001605 _shr(T, Src1);
1606 _mov(Dest, T);
1607 break;
1608 case InstArithmetic::Ashr:
1609 _mov(T, Src0);
1610 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001611 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001612 _sar(T, Src1);
1613 _mov(Dest, T);
1614 break;
1615 case InstArithmetic::Udiv:
Jan Voung70d68832014-06-17 10:02:37 -07001616 // div and idiv are the few arithmetic operators that do not allow
1617 // immediates as the operand.
1618 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001619 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001620 Variable *T_ah = NULL;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001621 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001622 _mov(T, Src0, RegX8632::Reg_eax);
1623 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001624 _div(T, Src1, T_ah);
1625 _mov(Dest, T);
1626 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001627 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001628 _mov(T, Src0, RegX8632::Reg_eax);
1629 _mov(T_edx, Zero, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001630 _div(T, Src1, T_edx);
1631 _mov(Dest, T);
1632 }
1633 break;
1634 case InstArithmetic::Sdiv:
Jan Voung70d68832014-06-17 10:02:37 -07001635 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001636 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001637 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001638 _cbwdq(T, T);
1639 _idiv(T, Src1, T);
1640 _mov(Dest, T);
1641 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001642 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1643 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001644 _cbwdq(T_edx, T);
1645 _idiv(T, Src1, T_edx);
1646 _mov(Dest, T);
1647 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001648 break;
1649 case InstArithmetic::Urem:
Jan Voung70d68832014-06-17 10:02:37 -07001650 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001651 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001652 Variable *T_ah = NULL;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001653 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001654 _mov(T, Src0, RegX8632::Reg_eax);
1655 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001656 _div(T_ah, Src1, T);
1657 _mov(Dest, T_ah);
1658 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001659 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001660 _mov(T_edx, Zero, RegX8632::Reg_edx);
1661 _mov(T, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001662 _div(T_edx, Src1, T);
1663 _mov(Dest, T_edx);
1664 }
1665 break;
1666 case InstArithmetic::Srem:
Jan Voung70d68832014-06-17 10:02:37 -07001667 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001668 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001669 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
1670 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001671 _cbwdq(T, T);
1672 Context.insert(InstFakeDef::create(Func, T_ah));
1673 _idiv(T_ah, Src1, T);
1674 _mov(Dest, T_ah);
1675 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001676 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1677 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001678 _cbwdq(T_edx, T);
1679 _idiv(T_edx, Src1, T);
1680 _mov(Dest, T_edx);
1681 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001682 break;
1683 case InstArithmetic::Fadd:
1684 _mov(T, Src0);
1685 _addss(T, Src1);
1686 _mov(Dest, T);
1687 break;
1688 case InstArithmetic::Fsub:
1689 _mov(T, Src0);
1690 _subss(T, Src1);
1691 _mov(Dest, T);
1692 break;
1693 case InstArithmetic::Fmul:
1694 _mov(T, Src0);
1695 _mulss(T, Src1);
1696 _mov(Dest, T);
1697 break;
1698 case InstArithmetic::Fdiv:
1699 _mov(T, Src0);
1700 _divss(T, Src1);
1701 _mov(Dest, T);
1702 break;
1703 case InstArithmetic::Frem: {
1704 const SizeT MaxSrcs = 2;
1705 Type Ty = Dest->getType();
Jan Voung3a569182014-09-29 10:16:01 -07001706 InstCall *Call = makeHelperCall(
1707 isFloat32Asserting32Or64(Ty) ? "fmodf" : "fmod", Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001708 Call->addArg(Src0);
1709 Call->addArg(Src1);
1710 return lowerCall(Call);
1711 } break;
1712 }
1713 }
1714}
1715
1716void TargetX8632::lowerAssign(const InstAssign *Inst) {
1717 Variable *Dest = Inst->getDest();
1718 Operand *Src0 = Inst->getSrc(0);
1719 assert(Dest->getType() == Src0->getType());
1720 if (Dest->getType() == IceType_i64) {
1721 Src0 = legalize(Src0);
1722 Operand *Src0Lo = loOperand(Src0);
1723 Operand *Src0Hi = hiOperand(Src0);
1724 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1725 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1726 Variable *T_Lo = NULL, *T_Hi = NULL;
1727 _mov(T_Lo, Src0Lo);
1728 _mov(DestLo, T_Lo);
1729 _mov(T_Hi, Src0Hi);
1730 _mov(DestHi, T_Hi);
1731 } else {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07001732 // If Dest is in memory, then RI is either a physical register or
1733 // an immediate, otherwise RI can be anything.
1734 Operand *RI =
1735 legalize(Src0, Dest->hasReg() ? Legal_All : Legal_Reg | Legal_Imm);
Matt Wala45a06232014-07-09 16:33:22 -07001736 if (isVectorType(Dest->getType()))
1737 _movp(Dest, RI);
1738 else
1739 _mov(Dest, RI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001740 }
1741}
1742
1743void TargetX8632::lowerBr(const InstBr *Inst) {
1744 if (Inst->isUnconditional()) {
1745 _br(Inst->getTargetUnconditional());
1746 } else {
Jim Stichnoth206833c2014-08-07 10:58:05 -07001747 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001748 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001749 _cmp(Src0, Zero);
Jan Voungbd385e42014-09-18 18:18:10 -07001750 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001751 }
1752}
1753
1754void TargetX8632::lowerCall(const InstCall *Instr) {
Matt Wala105b7042014-08-11 19:56:19 -07001755 // x86-32 calling convention:
1756 //
1757 // * At the point before the call, the stack must be aligned to 16
1758 // bytes.
1759 //
1760 // * The first four arguments of vector type, regardless of their
1761 // position relative to the other arguments in the argument list, are
1762 // placed in registers xmm0 - xmm3.
1763 //
1764 // * Other arguments are pushed onto the stack in right-to-left order,
1765 // such that the left-most argument ends up on the top of the stack at
1766 // the lowest memory address.
1767 //
1768 // * Stack arguments of vector type are aligned to start at the next
1769 // highest multiple of 16 bytes. Other stack arguments are aligned to
1770 // 4 bytes.
1771 //
1772 // This intends to match the section "IA-32 Function Calling
1773 // Convention" of the document "OS X ABI Function Call Guide" by
1774 // Apple.
1775 NeedsStackAlignment = true;
1776
1777 OperandList XmmArgs;
1778 OperandList StackArgs, StackArgLocations;
1779 uint32_t ParameterAreaSizeBytes = 0;
1780
Matt Wala45a06232014-07-09 16:33:22 -07001781 // Classify each argument operand according to the location where the
1782 // argument is passed.
Matt Wala45a06232014-07-09 16:33:22 -07001783 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1784 Operand *Arg = Instr->getArg(i);
Matt Wala105b7042014-08-11 19:56:19 -07001785 Type Ty = Arg->getType();
1786 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
Jan Voung3a569182014-09-29 10:16:01 -07001787 assert(typeWidthInBytes(Ty) >= 4);
Matt Wala105b7042014-08-11 19:56:19 -07001788 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
Matt Wala45a06232014-07-09 16:33:22 -07001789 XmmArgs.push_back(Arg);
1790 } else {
1791 StackArgs.push_back(Arg);
Matt Wala105b7042014-08-11 19:56:19 -07001792 if (isVectorType(Arg->getType())) {
1793 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1794 }
Jan Voungbd385e42014-09-18 18:18:10 -07001795 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voungbc004632014-09-16 15:09:10 -07001796 Constant *Loc =
1797 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);
Matt Wala105b7042014-08-11 19:56:19 -07001798 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1799 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
Matt Wala45a06232014-07-09 16:33:22 -07001800 }
1801 }
Matt Wala105b7042014-08-11 19:56:19 -07001802
1803 // Adjust the parameter area so that the stack is aligned. It is
1804 // assumed that the stack is already aligned at the start of the
1805 // calling sequence.
1806 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1807
1808 // Subtract the appropriate amount for the argument area. This also
1809 // takes care of setting the stack adjustment during emission.
Matt Wala45a06232014-07-09 16:33:22 -07001810 //
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001811 // TODO: If for some reason the call instruction gets dead-code
1812 // eliminated after lowering, we would need to ensure that the
Matt Wala105b7042014-08-11 19:56:19 -07001813 // pre-call and the post-call esp adjustment get eliminated as well.
1814 if (ParameterAreaSizeBytes) {
1815 _adjust_stack(ParameterAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001816 }
Matt Wala105b7042014-08-11 19:56:19 -07001817
1818 // Copy arguments that are passed on the stack to the appropriate
1819 // stack locations.
1820 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
1821 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
Matt Wala105b7042014-08-11 19:56:19 -07001822 }
1823
Matt Wala45a06232014-07-09 16:33:22 -07001824 // Copy arguments to be passed in registers to the appropriate
1825 // registers.
1826 // TODO: Investigate the impact of lowering arguments passed in
1827 // registers after lowering stack arguments as opposed to the other
1828 // way around. Lowering register arguments after stack arguments may
1829 // reduce register pressure. On the other hand, lowering register
1830 // arguments first (before stack arguments) may result in more compact
1831 // code, as the memory operand displacements may end up being smaller
1832 // before any stack adjustment is done.
1833 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Jim Stichnothad403532014-09-25 12:44:17 -07001834 Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
Matt Wala45a06232014-07-09 16:33:22 -07001835 // Generate a FakeUse of register arguments so that they do not get
1836 // dead code eliminated as a result of the FakeKill of scratch
1837 // registers after the call.
1838 Context.insert(InstFakeUse::create(Func, Reg));
1839 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001840 // Generate the call instruction. Assign its result to a temporary
1841 // with high register allocation weight.
1842 Variable *Dest = Instr->getDest();
Matt Wala45a06232014-07-09 16:33:22 -07001843 // ReturnReg doubles as ReturnRegLo as necessary.
1844 Variable *ReturnReg = NULL;
1845 Variable *ReturnRegHi = NULL;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001846 if (Dest) {
1847 switch (Dest->getType()) {
1848 case IceType_NUM:
1849 llvm_unreachable("Invalid Call dest type");
1850 break;
1851 case IceType_void:
1852 break;
1853 case IceType_i1:
1854 case IceType_i8:
1855 case IceType_i16:
1856 case IceType_i32:
Jan Voungbd385e42014-09-18 18:18:10 -07001857 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001858 break;
1859 case IceType_i64:
Jan Voungbd385e42014-09-18 18:18:10 -07001860 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
1861 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001862 break;
1863 case IceType_f32:
1864 case IceType_f64:
Matt Wala45a06232014-07-09 16:33:22 -07001865 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with
1866 // the fstp instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001867 break;
Matt Wala928f1292014-07-07 16:50:46 -07001868 case IceType_v4i1:
1869 case IceType_v8i1:
1870 case IceType_v16i1:
1871 case IceType_v16i8:
1872 case IceType_v8i16:
1873 case IceType_v4i32:
Matt Wala45a06232014-07-09 16:33:22 -07001874 case IceType_v4f32:
Jan Voungbd385e42014-09-18 18:18:10 -07001875 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
Matt Wala45a06232014-07-09 16:33:22 -07001876 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001877 }
1878 }
Jim Stichnothdd165072014-11-02 09:41:45 -08001879 Operand *CallTarget = legalize(Instr->getCallTarget());
Matt Wala45a06232014-07-09 16:33:22 -07001880 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001881 Context.insert(NewCall);
Matt Wala45a06232014-07-09 16:33:22 -07001882 if (ReturnRegHi)
1883 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001884
Matt Wala105b7042014-08-11 19:56:19 -07001885 // Add the appropriate offset to esp. The call instruction takes care
1886 // of resetting the stack offset during emission.
1887 if (ParameterAreaSizeBytes) {
Jan Voungbd385e42014-09-18 18:18:10 -07001888 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voungbc004632014-09-16 15:09:10 -07001889 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001890 }
1891
1892 // Insert a register-kill pseudo instruction.
Jim Stichnoth87ff3a12014-11-14 10:27:29 -08001893 Context.insert(InstFakeKill::create(Func, NewCall));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001894
1895 // Generate a FakeUse to keep the call live if necessary.
Matt Wala45a06232014-07-09 16:33:22 -07001896 if (Instr->hasSideEffects() && ReturnReg) {
1897 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001898 Context.insert(FakeUse);
1899 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001900
Matt Wala45a06232014-07-09 16:33:22 -07001901 if (!Dest)
1902 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001903
Matt Wala45a06232014-07-09 16:33:22 -07001904 // Assign the result of the call to Dest.
1905 if (ReturnReg) {
1906 if (ReturnRegHi) {
1907 assert(Dest->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001908 split64(Dest);
1909 Variable *DestLo = Dest->getLo();
1910 Variable *DestHi = Dest->getHi();
Matt Wala45a06232014-07-09 16:33:22 -07001911 _mov(DestLo, ReturnReg);
1912 _mov(DestHi, ReturnRegHi);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001913 } else {
Matt Wala45a06232014-07-09 16:33:22 -07001914 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1915 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1916 isVectorType(Dest->getType()));
Matt Wala45a06232014-07-09 16:33:22 -07001917 if (isVectorType(Dest->getType())) {
1918 _movp(Dest, ReturnReg);
1919 } else {
1920 _mov(Dest, ReturnReg);
1921 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001922 }
Jan Voung3a569182014-09-29 10:16:01 -07001923 } else if (isScalarFloatingType(Dest->getType())) {
Matt Wala45a06232014-07-09 16:33:22 -07001924 // Special treatment for an FP function which returns its result in
1925 // st(0).
Matt Wala45a06232014-07-09 16:33:22 -07001926 // If Dest ends up being a physical xmm register, the fstp emit code
1927 // will route st(0) through a temporary stack slot.
Jim Stichnotha5229722014-09-12 13:06:09 -07001928 _fstp(Dest);
1929 // Create a fake use of Dest in case it actually isn't used,
1930 // because st(0) still needs to be popped.
1931 Context.insert(InstFakeUse::create(Func, Dest));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001932 }
1933}
1934
1935void TargetX8632::lowerCast(const InstCast *Inst) {
1936 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1937 InstCast::OpKind CastKind = Inst->getCastKind();
1938 Variable *Dest = Inst->getDest();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001939 switch (CastKind) {
1940 default:
1941 Func->setError("Cast type not supported");
1942 return;
Jan Voung1ee34162014-06-24 13:43:30 -07001943 case InstCast::Sext: {
1944 // Src0RM is the source operand legalized to physical register or memory,
1945 // but not immediate, since the relevant x86 native instructions don't
1946 // allow an immediate operand. If the operand is an immediate, we could
1947 // consider computing the strength-reduced result at translation time,
1948 // but we're unlikely to see something like that in the bitcode that
1949 // the optimizer wouldn't have already taken care of.
1950 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001951 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07001952 Type DestTy = Dest->getType();
1953 if (DestTy == IceType_v16i8) {
1954 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
1955 Variable *OneMask = makeVectorOfOnes(Dest->getType());
1956 Variable *T = makeReg(DestTy);
1957 _movp(T, Src0RM);
1958 _pand(T, OneMask);
1959 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1960 _pcmpgt(T, Zeros);
1961 _movp(Dest, T);
1962 } else {
1963 // width = width(elty) - 1; dest = (src << width) >> width
1964 SizeT ShiftAmount =
1965 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
Jan Voungbc004632014-09-16 15:09:10 -07001966 Constant *ShiftConstant =
1967 Ctx->getConstantInt32(IceType_i8, ShiftAmount);
Matt Wala83b80362014-07-16 10:21:30 -07001968 Variable *T = makeReg(DestTy);
1969 _movp(T, Src0RM);
1970 _psll(T, ShiftConstant);
1971 _psra(T, ShiftConstant);
1972 _movp(Dest, T);
1973 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07001974 } else if (Dest->getType() == IceType_i64) {
1975 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Jan Voungbc004632014-09-16 15:09:10 -07001976 Constant *Shift = Ctx->getConstantInt32(IceType_i32, 31);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001977 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1978 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1979 Variable *T_Lo = makeReg(DestLo->getType());
1980 if (Src0RM->getType() == IceType_i32) {
1981 _mov(T_Lo, Src0RM);
1982 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001983 _movzx(T_Lo, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001984 _shl(T_Lo, Shift);
1985 _sar(T_Lo, Shift);
1986 } else {
1987 _movsx(T_Lo, Src0RM);
1988 }
1989 _mov(DestLo, T_Lo);
1990 Variable *T_Hi = NULL;
1991 _mov(T_Hi, T_Lo);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001992 if (Src0RM->getType() != IceType_i1)
1993 // For i1, the sar instruction is already done above.
1994 _sar(T_Hi, Shift);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001995 _mov(DestHi, T_Hi);
1996 } else if (Src0RM->getType() == IceType_i1) {
1997 // t1 = src
1998 // shl t1, dst_bitwidth - 1
1999 // sar t1, dst_bitwidth - 1
2000 // dst = t1
2001 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
Jan Voungbc004632014-09-16 15:09:10 -07002002 Constant *ShiftAmount = Ctx->getConstantInt32(IceType_i32, DestBits - 1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002003 Variable *T = makeReg(Dest->getType());
2004 if (typeWidthInBytes(Dest->getType()) <=
2005 typeWidthInBytes(Src0RM->getType())) {
2006 _mov(T, Src0RM);
2007 } else {
2008 // Widen the source using movsx or movzx. (It doesn't matter
2009 // which one, since the following shl/sar overwrite the bits.)
2010 _movzx(T, Src0RM);
2011 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002012 _shl(T, ShiftAmount);
2013 _sar(T, ShiftAmount);
2014 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002015 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002016 // t1 = movsx src; dst = t1
2017 Variable *T = makeReg(Dest->getType());
2018 _movsx(T, Src0RM);
2019 _mov(Dest, T);
2020 }
2021 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002022 }
2023 case InstCast::Zext: {
2024 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002025 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07002026 // onemask = materialize(1,1,...); dest = onemask & src
2027 Type DestTy = Dest->getType();
2028 Variable *OneMask = makeVectorOfOnes(DestTy);
2029 Variable *T = makeReg(DestTy);
2030 _movp(T, Src0RM);
2031 _pand(T, OneMask);
2032 _movp(Dest, T);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002033 } else if (Dest->getType() == IceType_i64) {
2034 // t1=movzx src; dst.lo=t1; dst.hi=0
2035 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2036 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2037 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2038 Variable *Tmp = makeReg(DestLo->getType());
2039 if (Src0RM->getType() == IceType_i32) {
2040 _mov(Tmp, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002041 } else {
2042 _movzx(Tmp, Src0RM);
2043 }
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002044 if (Src0RM->getType() == IceType_i1) {
Jan Voungbc004632014-09-16 15:09:10 -07002045 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002046 _and(Tmp, One);
2047 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002048 _mov(DestLo, Tmp);
2049 _mov(DestHi, Zero);
2050 } else if (Src0RM->getType() == IceType_i1) {
2051 // t = Src0RM; t &= 1; Dest = t
Jan Voungbc004632014-09-16 15:09:10 -07002052 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
Jan Voung39d4aca2014-10-15 15:16:54 -07002053 Type DestTy = Dest->getType();
2054 Variable *T;
2055 if (DestTy == IceType_i8) {
2056 T = makeReg(DestTy);
2057 _mov(T, Src0RM);
2058 } else {
2059 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
2060 T = makeReg(IceType_i32);
2061 _movzx(T, Src0RM);
2062 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002063 _and(T, One);
2064 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002065 } else {
2066 // t1 = movzx src; dst = t1
2067 Variable *T = makeReg(Dest->getType());
2068 _movzx(T, Src0RM);
2069 _mov(Dest, T);
2070 }
2071 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002072 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002073 case InstCast::Trunc: {
Matt Wala83b80362014-07-16 10:21:30 -07002074 if (isVectorType(Dest->getType())) {
2075 // onemask = materialize(1,1,...); dst = src & onemask
2076 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2077 Type Src0Ty = Src0RM->getType();
2078 Variable *OneMask = makeVectorOfOnes(Src0Ty);
2079 Variable *T = makeReg(Dest->getType());
2080 _movp(T, Src0RM);
2081 _pand(T, OneMask);
2082 _movp(Dest, T);
2083 } else {
2084 Operand *Src0 = Inst->getSrc(0);
2085 if (Src0->getType() == IceType_i64)
2086 Src0 = loOperand(Src0);
2087 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2088 // t1 = trunc Src0RM; Dest = t1
2089 Variable *T = NULL;
2090 _mov(T, Src0RM);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002091 if (Dest->getType() == IceType_i1)
Jan Voungbc004632014-09-16 15:09:10 -07002092 _and(T, Ctx->getConstantInt32(IceType_i1, 1));
Matt Wala83b80362014-07-16 10:21:30 -07002093 _mov(Dest, T);
2094 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002095 break;
2096 }
2097 case InstCast::Fptrunc:
2098 case InstCast::Fpext: {
Jan Voung1ee34162014-06-24 13:43:30 -07002099 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002100 // t1 = cvt Src0RM; Dest = t1
2101 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002102 _cvt(T, Src0RM, InstX8632Cvt::Float2float);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002103 _mov(Dest, T);
2104 break;
2105 }
2106 case InstCast::Fptosi:
Matt Wala83b80362014-07-16 10:21:30 -07002107 if (isVectorType(Dest->getType())) {
2108 assert(Dest->getType() == IceType_v4i32 &&
2109 Inst->getSrc(0)->getType() == IceType_v4f32);
2110 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2111 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002112 _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
Matt Wala83b80362014-07-16 10:21:30 -07002113 _movp(Dest, T);
2114 } else if (Dest->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002115 // Use a helper for converting floating-point values to 64-bit
2116 // integers. SSE2 appears to have no way to convert from xmm
2117 // registers to something like the edx:eax register pair, and
2118 // gcc and clang both want to use x87 instructions complete with
2119 // temporary manipulation of the status word. This helper is
2120 // not needed for x86-64.
2121 split64(Dest);
2122 const SizeT MaxSrcs = 1;
2123 Type SrcType = Inst->getSrc(0)->getType();
2124 InstCall *Call = makeHelperCall(
Jan Voung3a569182014-09-29 10:16:01 -07002125 isFloat32Asserting32Or64(SrcType) ? "cvtftosi64" : "cvtdtosi64", Dest,
2126 MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002127 // TODO: Call the correct compiler-rt helper function.
2128 Call->addArg(Inst->getSrc(0));
2129 lowerCall(Call);
2130 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002131 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002132 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2133 Variable *T_1 = makeReg(IceType_i32);
2134 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002135 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002136 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002137 if (Dest->getType() == IceType_i1)
Jan Voungbc004632014-09-16 15:09:10 -07002138 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002139 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002140 }
2141 break;
2142 case InstCast::Fptoui:
Matt Wala83b80362014-07-16 10:21:30 -07002143 if (isVectorType(Dest->getType())) {
2144 assert(Dest->getType() == IceType_v4i32 &&
2145 Inst->getSrc(0)->getType() == IceType_v4f32);
2146 const SizeT MaxSrcs = 1;
2147 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
2148 Call->addArg(Inst->getSrc(0));
2149 lowerCall(Call);
2150 } else if (Dest->getType() == IceType_i64 ||
2151 Dest->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002152 // Use a helper for both x86-32 and x86-64.
2153 split64(Dest);
2154 const SizeT MaxSrcs = 1;
2155 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07002156 Type SrcType = Inst->getSrc(0)->getType();
Jan Voung3a569182014-09-29 10:16:01 -07002157 IceString DstSubstring = (isInt32Asserting32Or64(DestType) ? "32" : "64");
2158 IceString SrcSubstring = (isFloat32Asserting32Or64(SrcType) ? "f" : "d");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002159 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
2160 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
2161 // TODO: Call the correct compiler-rt helper function.
2162 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2163 Call->addArg(Inst->getSrc(0));
2164 lowerCall(Call);
2165 return;
2166 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002167 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002168 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2169 Variable *T_1 = makeReg(IceType_i32);
2170 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002171 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002172 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002173 if (Dest->getType() == IceType_i1)
Jan Voungbc004632014-09-16 15:09:10 -07002174 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002175 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002176 }
2177 break;
2178 case InstCast::Sitofp:
Matt Wala83b80362014-07-16 10:21:30 -07002179 if (isVectorType(Dest->getType())) {
2180 assert(Dest->getType() == IceType_v4f32 &&
2181 Inst->getSrc(0)->getType() == IceType_v4i32);
2182 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2183 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002184 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
Matt Wala83b80362014-07-16 10:21:30 -07002185 _movp(Dest, T);
2186 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002187 // Use a helper for x86-32.
2188 const SizeT MaxSrcs = 1;
2189 Type DestType = Dest->getType();
2190 InstCall *Call = makeHelperCall(
Jan Voung3a569182014-09-29 10:16:01 -07002191 isFloat32Asserting32Or64(DestType) ? "cvtsi64tof" : "cvtsi64tod",
2192 Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002193 // TODO: Call the correct compiler-rt helper function.
2194 Call->addArg(Inst->getSrc(0));
2195 lowerCall(Call);
2196 return;
2197 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002198 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002199 // Sign-extend the operand.
2200 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2201 Variable *T_1 = makeReg(IceType_i32);
2202 Variable *T_2 = makeReg(Dest->getType());
2203 if (Src0RM->getType() == IceType_i32)
2204 _mov(T_1, Src0RM);
2205 else
2206 _movsx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002207 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002208 _mov(Dest, T_2);
2209 }
2210 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002211 case InstCast::Uitofp: {
2212 Operand *Src0 = Inst->getSrc(0);
Matt Wala83b80362014-07-16 10:21:30 -07002213 if (isVectorType(Src0->getType())) {
2214 assert(Dest->getType() == IceType_v4f32 &&
2215 Src0->getType() == IceType_v4i32);
2216 const SizeT MaxSrcs = 1;
2217 InstCall *Call = makeHelperCall("Sz_uitofp_v4i32", Dest, MaxSrcs);
2218 Call->addArg(Src0);
2219 lowerCall(Call);
2220 } else if (Src0->getType() == IceType_i64 ||
2221 Src0->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002222 // Use a helper for x86-32 and x86-64. Also use a helper for
2223 // i32 on x86-32.
2224 const SizeT MaxSrcs = 1;
2225 Type DestType = Dest->getType();
Jan Voung3a569182014-09-29 10:16:01 -07002226 IceString SrcSubstring =
2227 (isInt32Asserting32Or64(Src0->getType()) ? "32" : "64");
2228 IceString DstSubstring = (isFloat32Asserting32Or64(DestType) ? "f" : "d");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002229 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
2230 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
2231 // TODO: Call the correct compiler-rt helper function.
2232 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Jan Voung1ee34162014-06-24 13:43:30 -07002233 Call->addArg(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002234 lowerCall(Call);
2235 return;
2236 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002237 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002238 // Zero-extend the operand.
2239 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2240 Variable *T_1 = makeReg(IceType_i32);
2241 Variable *T_2 = makeReg(Dest->getType());
2242 if (Src0RM->getType() == IceType_i32)
2243 _mov(T_1, Src0RM);
2244 else
2245 _movzx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002246 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002247 _mov(Dest, T_2);
2248 }
2249 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002250 }
2251 case InstCast::Bitcast: {
2252 Operand *Src0 = Inst->getSrc(0);
2253 if (Dest->getType() == Src0->getType()) {
2254 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002255 lowerAssign(Assign);
2256 return;
2257 }
2258 switch (Dest->getType()) {
2259 default:
2260 llvm_unreachable("Unexpected Bitcast dest type");
Matt Wala83b80362014-07-16 10:21:30 -07002261 case IceType_i8: {
2262 assert(Src0->getType() == IceType_v8i1);
2263 InstCall *Call = makeHelperCall("Sz_bitcast_v8i1_to_i8", Dest, 1);
2264 Call->addArg(Src0);
2265 lowerCall(Call);
2266 } break;
2267 case IceType_i16: {
2268 assert(Src0->getType() == IceType_v16i1);
2269 InstCall *Call = makeHelperCall("Sz_bitcast_v16i1_to_i16", Dest, 1);
2270 Call->addArg(Src0);
2271 lowerCall(Call);
2272 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002273 case IceType_i32:
2274 case IceType_f32: {
Jan Voung1ee34162014-06-24 13:43:30 -07002275 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002276 Type DestType = Dest->getType();
2277 Type SrcType = Src0RM->getType();
Jim Stichnoth6e992142014-07-30 14:45:20 -07002278 (void)DestType;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002279 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2280 (DestType == IceType_f32 && SrcType == IceType_i32));
2281 // a.i32 = bitcast b.f32 ==>
2282 // t.f32 = b.f32
2283 // s.f32 = spill t.f32
2284 // a.i32 = s.f32
2285 Variable *T = NULL;
2286 // TODO: Should be able to force a spill setup by calling legalize() with
2287 // Legal_Mem and not Legal_Reg or Legal_Imm.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002288 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002289 SpillVar->setLinkedTo(Dest);
2290 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002291 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002292 _mov(T, Src0RM);
2293 _mov(Spill, T);
2294 _mov(Dest, Spill);
2295 } break;
2296 case IceType_i64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002297 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002298 assert(Src0RM->getType() == IceType_f64);
2299 // a.i64 = bitcast b.f64 ==>
2300 // s.f64 = spill b.f64
2301 // t_lo.i32 = lo(s.f64)
2302 // a_lo.i32 = t_lo.i32
2303 // t_hi.i32 = hi(s.f64)
2304 // a_hi.i32 = t_hi.i32
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002305 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002306 SpillVar->setLinkedTo(llvm::dyn_cast<Variable>(Src0RM));
2307 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002308 Spill->setWeight(RegWeight::Zero);
Jan Voung5cd240d2014-06-25 10:36:46 -07002309 _movq(Spill, Src0RM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002310
2311 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2312 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2313 Variable *T_Lo = makeReg(IceType_i32);
2314 Variable *T_Hi = makeReg(IceType_i32);
2315 VariableSplit *SpillLo =
2316 VariableSplit::create(Func, Spill, VariableSplit::Low);
2317 VariableSplit *SpillHi =
2318 VariableSplit::create(Func, Spill, VariableSplit::High);
2319
2320 _mov(T_Lo, SpillLo);
2321 _mov(DestLo, T_Lo);
2322 _mov(T_Hi, SpillHi);
2323 _mov(DestHi, T_Hi);
2324 } break;
2325 case IceType_f64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002326 Src0 = legalize(Src0);
2327 assert(Src0->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002328 // a.f64 = bitcast b.i64 ==>
2329 // t_lo.i32 = b_lo.i32
Jan Voung1ee34162014-06-24 13:43:30 -07002330 // FakeDef(s.f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002331 // lo(s.f64) = t_lo.i32
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002332 // t_hi.i32 = b_hi.i32
2333 // hi(s.f64) = t_hi.i32
2334 // a.f64 = s.f64
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002335 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002336 SpillVar->setLinkedTo(Dest);
2337 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002338 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002339
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002340 Variable *T_Lo = NULL, *T_Hi = NULL;
2341 VariableSplit *SpillLo =
2342 VariableSplit::create(Func, Spill, VariableSplit::Low);
2343 VariableSplit *SpillHi =
2344 VariableSplit::create(Func, Spill, VariableSplit::High);
Jan Voung1ee34162014-06-24 13:43:30 -07002345 _mov(T_Lo, loOperand(Src0));
2346 // Technically, the Spill is defined after the _store happens, but
2347 // SpillLo is considered a "use" of Spill so define Spill before it
2348 // is used.
2349 Context.insert(InstFakeDef::create(Func, Spill));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002350 _store(T_Lo, SpillLo);
Jan Voung1ee34162014-06-24 13:43:30 -07002351 _mov(T_Hi, hiOperand(Src0));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002352 _store(T_Hi, SpillHi);
Jan Voung5cd240d2014-06-25 10:36:46 -07002353 _movq(Dest, Spill);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002354 } break;
Matt Wala83b80362014-07-16 10:21:30 -07002355 case IceType_v8i1: {
2356 assert(Src0->getType() == IceType_i8);
2357 InstCall *Call = makeHelperCall("Sz_bitcast_i8_to_v8i1", Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002358 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002359 // Arguments to functions are required to be at least 32 bits wide.
2360 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2361 Call->addArg(Src0AsI32);
2362 lowerCall(Call);
2363 } break;
2364 case IceType_v16i1: {
2365 assert(Src0->getType() == IceType_i16);
2366 InstCall *Call = makeHelperCall("Sz_bitcast_i16_to_v16i1", Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002367 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002368 // Arguments to functions are required to be at least 32 bits wide.
2369 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2370 Call->addArg(Src0AsI32);
2371 lowerCall(Call);
2372 } break;
2373 case IceType_v8i16:
2374 case IceType_v16i8:
2375 case IceType_v4i32:
2376 case IceType_v4f32: {
2377 _movp(Dest, legalizeToVar(Src0));
2378 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002379 }
2380 break;
2381 }
Jan Voung1ee34162014-06-24 13:43:30 -07002382 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002383}
2384
Matt Wala49889232014-07-18 12:45:09 -07002385void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002386 Operand *SourceVectNotLegalized = Inst->getSrc(0);
Jan Voungbc004632014-09-16 15:09:10 -07002387 ConstantInteger32 *ElementIndex =
2388 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
Matt Wala49889232014-07-18 12:45:09 -07002389 // Only constant indices are allowed in PNaCl IR.
2390 assert(ElementIndex);
2391
2392 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002393 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002394 Type ElementTy = typeElementType(Ty);
2395 Type InVectorElementTy = getInVectorElementType(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002396 Variable *ExtractedElementR = makeReg(InVectorElementTy);
Matt Wala49889232014-07-18 12:45:09 -07002397
2398 // TODO(wala): Determine the best lowering sequences for each type.
Matt Wala0a450512014-07-30 12:44:39 -07002399 bool CanUsePextr =
2400 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2401 if (CanUsePextr && Ty != IceType_v4f32) {
2402 // Use pextrb, pextrw, or pextrd.
Jan Voungbc004632014-09-16 15:09:10 -07002403 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);
Matt Walae3777672014-07-31 09:06:17 -07002404 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2405 _pextr(ExtractedElementR, SourceVectR, Mask);
Matt Wala0a450512014-07-30 12:44:39 -07002406 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2407 // Use pshufd and movd/movss.
Matt Walacfe51462014-07-25 15:57:56 -07002408 Variable *T = NULL;
Matt Wala49889232014-07-18 12:45:09 -07002409 if (Index) {
2410 // The shuffle only needs to occur if the element to be extracted
2411 // is not at the lowest index.
Jan Voungbc004632014-09-16 15:09:10 -07002412 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);
Matt Wala49889232014-07-18 12:45:09 -07002413 T = makeReg(Ty);
Matt Walad4799f42014-08-14 14:24:12 -07002414 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
Matt Wala49889232014-07-18 12:45:09 -07002415 } else {
Matt Walad4799f42014-08-14 14:24:12 -07002416 T = legalizeToVar(SourceVectNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002417 }
2418
2419 if (InVectorElementTy == IceType_i32) {
Matt Walae3777672014-07-31 09:06:17 -07002420 _movd(ExtractedElementR, T);
Jan Voung3a569182014-09-29 10:16:01 -07002421 } else { // Ty == IceType_f32
Matt Walacfe51462014-07-25 15:57:56 -07002422 // TODO(wala): _movss is only used here because _mov does not
2423 // allow a vector source and a scalar destination. _mov should be
2424 // able to be used here.
2425 // _movss is a binary instruction, so the FakeDef is needed to
2426 // keep the live range analysis consistent.
Matt Walae3777672014-07-31 09:06:17 -07002427 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2428 _movss(ExtractedElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002429 }
Matt Wala49889232014-07-18 12:45:09 -07002430 } else {
2431 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2432 // Spill the value to a stack slot and do the extraction in memory.
Matt Wala49889232014-07-18 12:45:09 -07002433 //
Matt Walae3777672014-07-31 09:06:17 -07002434 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002435 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002436 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07002437 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002438 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002439
2440 // Compute the location of the element in memory.
2441 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2442 OperandX8632Mem *Loc =
2443 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002444 _mov(ExtractedElementR, Loc);
Matt Wala49889232014-07-18 12:45:09 -07002445 }
2446
2447 if (ElementTy == IceType_i1) {
2448 // Truncate extracted integers to i1s if necessary.
2449 Variable *T = makeReg(IceType_i1);
2450 InstCast *Cast =
Matt Walae3777672014-07-31 09:06:17 -07002451 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002452 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002453 ExtractedElementR = T;
Matt Wala49889232014-07-18 12:45:09 -07002454 }
2455
2456 // Copy the element to the destination.
2457 Variable *Dest = Inst->getDest();
Matt Walae3777672014-07-31 09:06:17 -07002458 _mov(Dest, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002459}
2460
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002461void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
2462 Operand *Src0 = Inst->getSrc(0);
2463 Operand *Src1 = Inst->getSrc(1);
2464 Variable *Dest = Inst->getDest();
Matt Walace0ca8f2014-07-24 12:34:20 -07002465
2466 if (isVectorType(Dest->getType())) {
2467 InstFcmp::FCond Condition = Inst->getCondition();
2468 size_t Index = static_cast<size_t>(Condition);
2469 assert(Index < TableFcmpSize);
2470
2471 if (TableFcmp[Index].SwapVectorOperands) {
2472 Operand *T = Src0;
2473 Src0 = Src1;
2474 Src1 = T;
2475 }
2476
2477 Variable *T = NULL;
2478
Matt Walae3777672014-07-31 09:06:17 -07002479 if (Condition == InstFcmp::True) {
2480 // makeVectorOfOnes() requires an integer vector type.
Matt Walace0ca8f2014-07-24 12:34:20 -07002481 T = makeVectorOfMinusOnes(IceType_v4i32);
Matt Walae3777672014-07-31 09:06:17 -07002482 } else if (Condition == InstFcmp::False) {
2483 T = makeVectorOfZeros(Dest->getType());
2484 } else {
2485 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2486 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2487
Matt Walae3777672014-07-31 09:06:17 -07002488 switch (Condition) {
2489 default: {
Jan Voungbd385e42014-09-18 18:18:10 -07002490 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;
2491 assert(Predicate != CondX86::Cmpps_Invalid);
Matt Walae3777672014-07-31 09:06:17 -07002492 T = makeReg(Src0RM->getType());
2493 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002494 _cmpps(T, Src1RM, Predicate);
Matt Walae3777672014-07-31 09:06:17 -07002495 } break;
2496 case InstFcmp::One: {
2497 // Check both unequal and ordered.
2498 T = makeReg(Src0RM->getType());
2499 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002500 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002501 _cmpps(T, Src1RM, CondX86::Cmpps_neq);
Matt Walae3777672014-07-31 09:06:17 -07002502 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002503 _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
Matt Walae3777672014-07-31 09:06:17 -07002504 _pand(T, T2);
2505 } break;
2506 case InstFcmp::Ueq: {
2507 // Check both equal or unordered.
2508 T = makeReg(Src0RM->getType());
2509 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002510 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002511 _cmpps(T, Src1RM, CondX86::Cmpps_eq);
Matt Walae3777672014-07-31 09:06:17 -07002512 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002513 _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
Matt Walae3777672014-07-31 09:06:17 -07002514 _por(T, T2);
2515 } break;
2516 }
Matt Walae3777672014-07-31 09:06:17 -07002517 }
Matt Walace0ca8f2014-07-24 12:34:20 -07002518
2519 _movp(Dest, T);
2520 eliminateNextVectorSextInstruction(Dest);
2521 return;
2522 }
2523
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002524 // Lowering a = fcmp cond, b, c
2525 // ucomiss b, c /* only if C1 != Br_None */
2526 // /* but swap b,c order if SwapOperands==true */
2527 // mov a, <default>
2528 // j<C1> label /* only if C1 != Br_None */
2529 // j<C2> label /* only if C2 != Br_None */
2530 // FakeUse(a) /* only if C1 != Br_None */
2531 // mov a, !<default> /* only if C1 != Br_None */
2532 // label: /* only if C1 != Br_None */
2533 InstFcmp::FCond Condition = Inst->getCondition();
2534 size_t Index = static_cast<size_t>(Condition);
2535 assert(Index < TableFcmpSize);
Matt Walace0ca8f2014-07-24 12:34:20 -07002536 if (TableFcmp[Index].SwapScalarOperands) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002537 Operand *Tmp = Src0;
2538 Src0 = Src1;
2539 Src1 = Tmp;
2540 }
Jan Voungbd385e42014-09-18 18:18:10 -07002541 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
2542 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002543 if (HasC1) {
2544 Src0 = legalize(Src0);
2545 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2546 Variable *T = NULL;
2547 _mov(T, Src0);
2548 _ucomiss(T, Src1RM);
2549 }
2550 Constant *Default =
Jan Voungbc004632014-09-16 15:09:10 -07002551 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002552 _mov(Dest, Default);
2553 if (HasC1) {
2554 InstX8632Label *Label = InstX8632Label::create(Func, this);
2555 _br(TableFcmp[Index].C1, Label);
2556 if (HasC2) {
2557 _br(TableFcmp[Index].C2, Label);
2558 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002559 Constant *NonDefault =
Jan Voungbc004632014-09-16 15:09:10 -07002560 Ctx->getConstantInt32(IceType_i32, !TableFcmp[Index].Default);
Jim Stichnoth47752552014-10-13 17:15:08 -07002561 _mov_nonkillable(Dest, NonDefault);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002562 Context.insert(Label);
2563 }
2564}
2565
2566void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2567 Operand *Src0 = legalize(Inst->getSrc(0));
2568 Operand *Src1 = legalize(Inst->getSrc(1));
2569 Variable *Dest = Inst->getDest();
2570
Matt Wala9a0168a2014-07-23 14:56:10 -07002571 if (isVectorType(Dest->getType())) {
2572 Type Ty = Src0->getType();
2573 // Promote i1 vectors to 128 bit integer vector types.
2574 if (typeElementType(Ty) == IceType_i1) {
2575 Type NewTy = IceType_NUM;
2576 switch (Ty) {
2577 default:
2578 llvm_unreachable("unexpected type");
2579 break;
2580 case IceType_v4i1:
2581 NewTy = IceType_v4i32;
2582 break;
2583 case IceType_v8i1:
2584 NewTy = IceType_v8i16;
2585 break;
2586 case IceType_v16i1:
2587 NewTy = IceType_v16i8;
2588 break;
2589 }
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002590 Variable *NewSrc0 = Func->makeVariable(NewTy);
2591 Variable *NewSrc1 = Func->makeVariable(NewTy);
Matt Wala9a0168a2014-07-23 14:56:10 -07002592 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2593 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2594 Src0 = NewSrc0;
2595 Src1 = NewSrc1;
2596 Ty = NewTy;
2597 }
2598
2599 InstIcmp::ICond Condition = Inst->getCondition();
2600
Matt Walae3777672014-07-31 09:06:17 -07002601 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2602 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2603
Matt Wala9a0168a2014-07-23 14:56:10 -07002604 // SSE2 only has signed comparison operations. Transform unsigned
2605 // inputs in a manner that allows for the use of signed comparison
2606 // operations by flipping the high order bits.
2607 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2608 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2609 Variable *T0 = makeReg(Ty);
2610 Variable *T1 = makeReg(Ty);
2611 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002612 _movp(T0, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002613 _pxor(T0, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002614 _movp(T1, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002615 _pxor(T1, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002616 Src0RM = T0;
2617 Src1RM = T1;
Matt Wala9a0168a2014-07-23 14:56:10 -07002618 }
2619
Matt Wala9a0168a2014-07-23 14:56:10 -07002620 Variable *T = makeReg(Ty);
2621 switch (Condition) {
2622 default:
2623 llvm_unreachable("unexpected condition");
2624 break;
2625 case InstIcmp::Eq: {
Matt Walae3777672014-07-31 09:06:17 -07002626 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002627 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002628 } break;
2629 case InstIcmp::Ne: {
Matt Walae3777672014-07-31 09:06:17 -07002630 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002631 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002632 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2633 _pxor(T, MinusOne);
2634 } break;
2635 case InstIcmp::Ugt:
2636 case InstIcmp::Sgt: {
Matt Walae3777672014-07-31 09:06:17 -07002637 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002638 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002639 } break;
2640 case InstIcmp::Uge:
2641 case InstIcmp::Sge: {
Matt Walae3777672014-07-31 09:06:17 -07002642 // !(Src1RM > Src0RM)
2643 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002644 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002645 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2646 _pxor(T, MinusOne);
2647 } break;
2648 case InstIcmp::Ult:
2649 case InstIcmp::Slt: {
Matt Walae3777672014-07-31 09:06:17 -07002650 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002651 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002652 } break;
2653 case InstIcmp::Ule:
2654 case InstIcmp::Sle: {
Matt Walae3777672014-07-31 09:06:17 -07002655 // !(Src0RM > Src1RM)
2656 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002657 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002658 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2659 _pxor(T, MinusOne);
2660 } break;
2661 }
Matt Wala9a0168a2014-07-23 14:56:10 -07002662
2663 _movp(Dest, T);
Matt Walace0ca8f2014-07-24 12:34:20 -07002664 eliminateNextVectorSextInstruction(Dest);
Matt Wala9a0168a2014-07-23 14:56:10 -07002665 return;
2666 }
2667
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002668 // If Src1 is an immediate, or known to be a physical register, we can
2669 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2670 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2671 // the physical register, but unfortunately we have to commit to one or
2672 // the other before register allocation.)
2673 bool IsSrc1ImmOrReg = false;
2674 if (llvm::isa<Constant>(Src1)) {
2675 IsSrc1ImmOrReg = true;
2676 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
2677 if (Var->hasReg())
2678 IsSrc1ImmOrReg = true;
2679 }
2680
2681 // Try to fuse a compare immediately followed by a conditional branch. This
2682 // is possible when the compare dest and the branch source operands are the
2683 // same, and are their only uses. TODO: implement this optimization for i64.
2684 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
2685 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
2686 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07002687 NextBr->setDeleted();
Jim Stichnothad403532014-09-25 12:44:17 -07002688 Operand *Src0RM =
2689 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002690 _cmp(Src0RM, Src1);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002691 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2692 NextBr->getTargetFalse());
2693 // Skip over the following branch instruction.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002694 Context.advanceNext();
2695 return;
2696 }
2697 }
2698
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002699 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
Matt Wala43ff7eb2014-06-18 10:30:07 -07002700 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbc004632014-09-16 15:09:10 -07002701 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002702 if (Src0->getType() == IceType_i64) {
2703 InstIcmp::ICond Condition = Inst->getCondition();
2704 size_t Index = static_cast<size_t>(Condition);
2705 assert(Index < TableIcmp64Size);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002706 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2707 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002708 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2709 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2710 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2711 InstX8632Label *Label = InstX8632Label::create(Func, this);
2712 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002713 _cmp(Src0LoRM, Src1LoRI);
Jan Voungbd385e42014-09-18 18:18:10 -07002714 _br(CondX86::Br_ne, Label);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002715 _cmp(Src0HiRM, Src1HiRI);
Jan Voungbd385e42014-09-18 18:18:10 -07002716 _br(CondX86::Br_ne, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07002717 _mov_nonkillable(Dest, (Condition == InstIcmp::Eq ? One : Zero));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002718 Context.insert(Label);
2719 } else {
2720 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2721 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2722 _mov(Dest, One);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002723 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002724 _br(TableIcmp64[Index].C1, LabelTrue);
2725 _br(TableIcmp64[Index].C2, LabelFalse);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002726 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002727 _br(TableIcmp64[Index].C3, LabelTrue);
2728 Context.insert(LabelFalse);
Jim Stichnoth47752552014-10-13 17:15:08 -07002729 _mov_nonkillable(Dest, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002730 Context.insert(LabelTrue);
2731 }
2732 return;
2733 }
2734
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002735 // cmp b, c
Jim Stichnothad403532014-09-25 12:44:17 -07002736 Operand *Src0RM =
2737 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002738 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002739 _cmp(Src0RM, Src1);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002740 _mov(Dest, One);
2741 _br(getIcmp32Mapping(Inst->getCondition()), Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07002742 _mov_nonkillable(Dest, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002743 Context.insert(Label);
2744}
2745
Matt Wala49889232014-07-18 12:45:09 -07002746void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002747 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2748 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
Jan Voungbc004632014-09-16 15:09:10 -07002749 ConstantInteger32 *ElementIndex =
2750 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
Matt Wala49889232014-07-18 12:45:09 -07002751 // Only constant indices are allowed in PNaCl IR.
2752 assert(ElementIndex);
2753 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002754 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
Matt Wala49889232014-07-18 12:45:09 -07002755
Matt Walae3777672014-07-31 09:06:17 -07002756 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002757 Type ElementTy = typeElementType(Ty);
2758 Type InVectorElementTy = getInVectorElementType(Ty);
2759
2760 if (ElementTy == IceType_i1) {
2761 // Expand the element to the appropriate size for it to be inserted
2762 // in the vector.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002763 Variable *Expanded = Func->makeVariable(InVectorElementTy);
Matt Walae3777672014-07-31 09:06:17 -07002764 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2765 ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002766 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002767 ElementToInsertNotLegalized = Expanded;
Matt Wala49889232014-07-18 12:45:09 -07002768 }
2769
Matt Wala0a450512014-07-30 12:44:39 -07002770 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2771 // Use insertps, pinsrb, pinsrw, or pinsrd.
Matt Walae3777672014-07-31 09:06:17 -07002772 Operand *ElementRM =
2773 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2774 Operand *SourceVectRM =
2775 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07002776 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002777 _movp(T, SourceVectRM);
Matt Wala0a450512014-07-30 12:44:39 -07002778 if (Ty == IceType_v4f32)
Jan Voungbc004632014-09-16 15:09:10 -07002779 _insertps(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index << 4));
Matt Wala0a450512014-07-30 12:44:39 -07002780 else
Jan Voungbc004632014-09-16 15:09:10 -07002781 _pinsr(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index));
Matt Wala0a450512014-07-30 12:44:39 -07002782 _movp(Inst->getDest(), T);
2783 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2784 // Use shufps or movss.
Matt Walae3777672014-07-31 09:06:17 -07002785 Variable *ElementR = NULL;
2786 Operand *SourceVectRM =
2787 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2788
Matt Wala49889232014-07-18 12:45:09 -07002789 if (InVectorElementTy == IceType_f32) {
Matt Walae3777672014-07-31 09:06:17 -07002790 // ElementR will be in an XMM register since it is floating point.
2791 ElementR = legalizeToVar(ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002792 } else {
2793 // Copy an integer to an XMM register.
Matt Walae3777672014-07-31 09:06:17 -07002794 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2795 ElementR = makeReg(Ty);
2796 _movd(ElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002797 }
2798
Matt Walacfe51462014-07-25 15:57:56 -07002799 if (Index == 0) {
2800 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002801 _movp(T, SourceVectRM);
2802 _movss(T, ElementR);
Matt Walacfe51462014-07-25 15:57:56 -07002803 _movp(Inst->getDest(), T);
2804 return;
2805 }
2806
Matt Wala49889232014-07-18 12:45:09 -07002807 // shufps treats the source and desination operands as vectors of
2808 // four doublewords. The destination's two high doublewords are
2809 // selected from the source operand and the two low doublewords are
2810 // selected from the (original value of) the destination operand.
2811 // An insertelement operation can be effected with a sequence of two
2812 // shufps operations with appropriate masks. In all cases below,
2813 // Element[0] is being inserted into SourceVectOperand. Indices are
2814 // ordered from left to right.
2815 //
Matt Walae3777672014-07-31 09:06:17 -07002816 // insertelement into index 1 (result is stored in ElementR):
2817 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
2818 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
Matt Wala49889232014-07-18 12:45:09 -07002819 //
2820 // insertelement into index 2 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002821 // T := SourceVectRM
2822 // ElementR := ElementR[0, 0] T[0, 3]
2823 // T := T[0, 1] ElementR[0, 3]
Matt Wala49889232014-07-18 12:45:09 -07002824 //
2825 // insertelement into index 3 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002826 // T := SourceVectRM
2827 // ElementR := ElementR[0, 0] T[0, 2]
2828 // T := T[0, 1] ElementR[3, 0]
Jim Stichnothfac55172014-10-01 13:06:21 -07002829 const unsigned char Mask1[3] = { 0, 192, 128 };
2830 const unsigned char Mask2[3] = { 227, 196, 52 };
Matt Wala49889232014-07-18 12:45:09 -07002831
Jan Voungbc004632014-09-16 15:09:10 -07002832 Constant *Mask1Constant =
2833 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]);
2834 Constant *Mask2Constant =
2835 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]);
Matt Wala49889232014-07-18 12:45:09 -07002836
Matt Walacfe51462014-07-25 15:57:56 -07002837 if (Index == 1) {
Matt Walae3777672014-07-31 09:06:17 -07002838 _shufps(ElementR, SourceVectRM, Mask1Constant);
2839 _shufps(ElementR, SourceVectRM, Mask2Constant);
2840 _movp(Inst->getDest(), ElementR);
Matt Wala49889232014-07-18 12:45:09 -07002841 } else {
2842 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002843 _movp(T, SourceVectRM);
2844 _shufps(ElementR, T, Mask1Constant);
2845 _shufps(T, ElementR, Mask2Constant);
Matt Wala49889232014-07-18 12:45:09 -07002846 _movp(Inst->getDest(), T);
2847 }
Matt Wala49889232014-07-18 12:45:09 -07002848 } else {
2849 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2850 // Spill the value to a stack slot and perform the insertion in
2851 // memory.
Matt Wala49889232014-07-18 12:45:09 -07002852 //
Matt Walae3777672014-07-31 09:06:17 -07002853 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002854 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002855 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07002856 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002857 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002858
2859 // Compute the location of the position to insert in memory.
2860 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2861 OperandX8632Mem *Loc =
2862 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002863 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
Matt Wala49889232014-07-18 12:45:09 -07002864
2865 Variable *T = makeReg(Ty);
2866 _movp(T, Slot);
2867 _movp(Inst->getDest(), T);
2868 }
2869}
2870
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002871void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2872 switch (Instr->getIntrinsicInfo().ID) {
Jan Vounga3a01a22014-07-14 10:32:41 -07002873 case Intrinsics::AtomicCmpxchg: {
Jan Voung5cd240d2014-06-25 10:36:46 -07002874 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002875 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002876 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2877 return;
2878 }
2879 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002880 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002881 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2882 return;
2883 }
Jan Vounga3a01a22014-07-14 10:32:41 -07002884 Variable *DestPrev = Instr->getDest();
2885 Operand *PtrToMem = Instr->getArg(0);
2886 Operand *Expected = Instr->getArg(1);
2887 Operand *Desired = Instr->getArg(2);
Jan Voungc820ddf2014-07-29 14:38:51 -07002888 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2889 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002890 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
Jan Voung5cd240d2014-06-25 10:36:46 -07002891 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002892 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002893 case Intrinsics::AtomicFence:
Jan Voung5cd240d2014-06-25 10:36:46 -07002894 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002895 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002896 Func->setError("Unexpected memory ordering for AtomicFence");
2897 return;
2898 }
2899 _mfence();
2900 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002901 case Intrinsics::AtomicFenceAll:
Jan Voung5cd240d2014-06-25 10:36:46 -07002902 // NOTE: FenceAll should prevent and load/store from being moved
2903 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2904 // instruction is currently marked coarsely as "HasSideEffects".
2905 _mfence();
2906 return;
2907 case Intrinsics::AtomicIsLockFree: {
2908 // X86 is always lock free for 8/16/32/64 bit accesses.
2909 // TODO(jvoung): Since the result is constant when given a constant
2910 // byte size, this opens up DCE opportunities.
2911 Operand *ByteSize = Instr->getArg(0);
2912 Variable *Dest = Instr->getDest();
Jan Voungbc004632014-09-16 15:09:10 -07002913 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002914 Constant *Result;
2915 switch (CI->getValue()) {
2916 default:
2917 // Some x86-64 processors support the cmpxchg16b intruction, which
2918 // can make 16-byte operations lock free (when used with the LOCK
2919 // prefix). However, that's not supported in 32-bit mode, so just
2920 // return 0 even for large sizes.
2921 Result = Ctx->getConstantZero(IceType_i32);
2922 break;
2923 case 1:
2924 case 2:
2925 case 4:
2926 case 8:
Jan Voungbc004632014-09-16 15:09:10 -07002927 Result = Ctx->getConstantInt32(IceType_i32, 1);
Jan Voung5cd240d2014-06-25 10:36:46 -07002928 break;
2929 }
2930 _mov(Dest, Result);
2931 return;
2932 }
2933 // The PNaCl ABI requires the byte size to be a compile-time constant.
2934 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2935 return;
2936 }
2937 case Intrinsics::AtomicLoad: {
2938 // We require the memory address to be naturally aligned.
2939 // Given that is the case, then normal loads are atomic.
2940 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002941 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002942 Func->setError("Unexpected memory ordering for AtomicLoad");
2943 return;
2944 }
2945 Variable *Dest = Instr->getDest();
2946 if (Dest->getType() == IceType_i64) {
2947 // Follow what GCC does and use a movq instead of what lowerLoad()
2948 // normally does (split the load into two).
2949 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2950 // can't happen anyway, since this is x86-32 and integer arithmetic only
2951 // happens on 32-bit quantities.
2952 Variable *T = makeReg(IceType_f64);
2953 OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64);
2954 _movq(T, Addr);
2955 // Then cast the bits back out of the XMM register to the i64 Dest.
2956 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2957 lowerCast(Cast);
Jan Vounge6e497d2014-07-30 10:06:03 -07002958 // Make sure that the atomic load isn't elided when unused.
Jan Voung5cd240d2014-06-25 10:36:46 -07002959 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2960 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2961 return;
2962 }
2963 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2964 lowerLoad(Load);
Jan Vounge6e497d2014-07-30 10:06:03 -07002965 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2966 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2967 // insert the FakeUse on the last-inserted instruction's dest.
Jim Stichnoth6e992142014-07-30 14:45:20 -07002968 Context.insert(
2969 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
Jan Voung5cd240d2014-06-25 10:36:46 -07002970 return;
2971 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002972 case Intrinsics::AtomicRMW:
Jan Voung5cd240d2014-06-25 10:36:46 -07002973 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002974 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002975 Func->setError("Unexpected memory ordering for AtomicRMW");
2976 return;
2977 }
2978 lowerAtomicRMW(Instr->getDest(),
Jan Voungbc004632014-09-16 15:09:10 -07002979 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
Jim Stichnothfac55172014-10-01 13:06:21 -07002980 Instr->getArg(0))->getValue()),
Jan Voung5cd240d2014-06-25 10:36:46 -07002981 Instr->getArg(1), Instr->getArg(2));
2982 return;
2983 case Intrinsics::AtomicStore: {
2984 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002985 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002986 Func->setError("Unexpected memory ordering for AtomicStore");
2987 return;
2988 }
2989 // We require the memory address to be naturally aligned.
2990 // Given that is the case, then normal stores are atomic.
2991 // Add a fence after the store to make it visible.
2992 Operand *Value = Instr->getArg(0);
2993 Operand *Ptr = Instr->getArg(1);
2994 if (Value->getType() == IceType_i64) {
2995 // Use a movq instead of what lowerStore() normally does
2996 // (split the store into two), following what GCC does.
2997 // Cast the bits from int -> to an xmm register first.
2998 Variable *T = makeReg(IceType_f64);
2999 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
3000 lowerCast(Cast);
3001 // Then store XMM w/ a movq.
3002 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64);
3003 _storeq(T, Addr);
3004 _mfence();
3005 return;
3006 }
3007 InstStore *Store = InstStore::create(Func, Value, Ptr);
3008 lowerStore(Store);
3009 _mfence();
3010 return;
3011 }
Jan Voung7fa813b2014-07-18 13:01:08 -07003012 case Intrinsics::Bswap: {
3013 Variable *Dest = Instr->getDest();
3014 Operand *Val = Instr->getArg(0);
3015 // In 32-bit mode, bswap only works on 32-bit arguments, and the
3016 // argument must be a register. Use rotate left for 16-bit bswap.
3017 if (Val->getType() == IceType_i64) {
3018 Variable *T_Lo = legalizeToVar(loOperand(Val));
3019 Variable *T_Hi = legalizeToVar(hiOperand(Val));
3020 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3021 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3022 _bswap(T_Lo);
3023 _bswap(T_Hi);
3024 _mov(DestLo, T_Hi);
3025 _mov(DestHi, T_Lo);
3026 } else if (Val->getType() == IceType_i32) {
3027 Variable *T = legalizeToVar(Val);
3028 _bswap(T);
3029 _mov(Dest, T);
3030 } else {
3031 assert(Val->getType() == IceType_i16);
3032 Val = legalize(Val);
Jan Voungbc004632014-09-16 15:09:10 -07003033 Constant *Eight = Ctx->getConstantInt32(IceType_i16, 8);
Jan Voung7fa813b2014-07-18 13:01:08 -07003034 Variable *T = NULL;
3035 _mov(T, Val);
3036 _rol(T, Eight);
3037 _mov(Dest, T);
3038 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003039 return;
Jan Voung7fa813b2014-07-18 13:01:08 -07003040 }
Jan Vounge4da26f2014-07-15 17:52:39 -07003041 case Intrinsics::Ctpop: {
3042 Variable *Dest = Instr->getDest();
3043 Operand *Val = Instr->getArg(0);
Jan Voung3a569182014-09-29 10:16:01 -07003044 InstCall *Call =
3045 makeHelperCall(isInt32Asserting32Or64(Val->getType()) ? "__popcountsi2"
3046 : "__popcountdi2",
3047 Dest, 1);
Jan Vounge4da26f2014-07-15 17:52:39 -07003048 Call->addArg(Val);
3049 lowerCall(Call);
3050 // The popcount helpers always return 32-bit values, while the intrinsic's
3051 // signature matches the native POPCNT instruction and fills a 64-bit reg
3052 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
3053 // the user doesn't do that in the IR. If the user does that in the IR,
3054 // then this zero'ing instruction is dead and gets optimized out.
3055 if (Val->getType() == IceType_i64) {
3056 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3057 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3058 _mov(DestHi, Zero);
3059 }
3060 return;
3061 }
3062 case Intrinsics::Ctlz: {
3063 // The "is zero undef" parameter is ignored and we always return
3064 // a well-defined value.
3065 Operand *Val = legalize(Instr->getArg(0));
3066 Operand *FirstVal;
3067 Operand *SecondVal = NULL;
3068 if (Val->getType() == IceType_i64) {
3069 FirstVal = loOperand(Val);
3070 SecondVal = hiOperand(Val);
3071 } else {
3072 FirstVal = Val;
3073 }
3074 const bool IsCttz = false;
3075 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3076 SecondVal);
3077 return;
3078 }
3079 case Intrinsics::Cttz: {
3080 // The "is zero undef" parameter is ignored and we always return
3081 // a well-defined value.
3082 Operand *Val = legalize(Instr->getArg(0));
3083 Operand *FirstVal;
3084 Operand *SecondVal = NULL;
3085 if (Val->getType() == IceType_i64) {
3086 FirstVal = hiOperand(Val);
3087 SecondVal = loOperand(Val);
3088 } else {
3089 FirstVal = Val;
3090 }
3091 const bool IsCttz = true;
3092 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3093 SecondVal);
3094 return;
3095 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003096 case Intrinsics::Longjmp: {
3097 InstCall *Call = makeHelperCall("longjmp", NULL, 2);
3098 Call->addArg(Instr->getArg(0));
3099 Call->addArg(Instr->getArg(1));
3100 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003101 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003102 }
3103 case Intrinsics::Memcpy: {
3104 // In the future, we could potentially emit an inline memcpy/memset, etc.
3105 // for intrinsic calls w/ a known length.
3106 InstCall *Call = makeHelperCall("memcpy", NULL, 3);
3107 Call->addArg(Instr->getArg(0));
3108 Call->addArg(Instr->getArg(1));
3109 Call->addArg(Instr->getArg(2));
3110 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003111 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003112 }
3113 case Intrinsics::Memmove: {
3114 InstCall *Call = makeHelperCall("memmove", NULL, 3);
3115 Call->addArg(Instr->getArg(0));
3116 Call->addArg(Instr->getArg(1));
3117 Call->addArg(Instr->getArg(2));
3118 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003119 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003120 }
3121 case Intrinsics::Memset: {
3122 // The value operand needs to be extended to a stack slot size
Matt Wala105b7042014-08-11 19:56:19 -07003123 // because the PNaCl ABI requires arguments to be at least 32 bits
3124 // wide.
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003125 Operand *ValOp = Instr->getArg(1);
3126 assert(ValOp->getType() == IceType_i8);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003127 Variable *ValExt = Func->makeVariable(stackSlotType());
Jan Voung957c50d2014-07-21 14:05:29 -07003128 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003129 InstCall *Call = makeHelperCall("memset", NULL, 3);
3130 Call->addArg(Instr->getArg(0));
3131 Call->addArg(ValExt);
3132 Call->addArg(Instr->getArg(2));
3133 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003134 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003135 }
3136 case Intrinsics::NaClReadTP: {
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003137 if (Ctx->getFlags().UseSandboxing) {
3138 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnothfac55172014-10-01 13:06:21 -07003139 Operand *Src = OperandX8632Mem::create(
3140 Func, IceType_i32, NULL, Zero, NULL, 0, OperandX8632Mem::SegReg_GS);
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003141 Variable *Dest = Instr->getDest();
3142 Variable *T = NULL;
3143 _mov(T, Src);
3144 _mov(Dest, T);
3145 } else {
3146 InstCall *Call = makeHelperCall("__nacl_read_tp", Instr->getDest(), 0);
3147 lowerCall(Call);
3148 }
Jan Voung5cd240d2014-06-25 10:36:46 -07003149 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003150 }
3151 case Intrinsics::Setjmp: {
3152 InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1);
3153 Call->addArg(Instr->getArg(0));
3154 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003155 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003156 }
Jan Voungf37fbbe2014-07-09 16:13:13 -07003157 case Intrinsics::Sqrt: {
3158 Operand *Src = legalize(Instr->getArg(0));
3159 Variable *Dest = Instr->getDest();
3160 Variable *T = makeReg(Dest->getType());
3161 _sqrtss(T, Src);
3162 _mov(Dest, T);
3163 return;
3164 }
Jan Voung7b34b592014-07-18 13:12:58 -07003165 case Intrinsics::Stacksave: {
Jan Voungbd385e42014-09-18 18:18:10 -07003166 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung7b34b592014-07-18 13:12:58 -07003167 Variable *Dest = Instr->getDest();
3168 _mov(Dest, esp);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003169 return;
Jan Voung7b34b592014-07-18 13:12:58 -07003170 }
3171 case Intrinsics::Stackrestore: {
Jan Voungbd385e42014-09-18 18:18:10 -07003172 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth47752552014-10-13 17:15:08 -07003173 _mov_nonkillable(esp, Instr->getArg(0));
Jan Voung7b34b592014-07-18 13:12:58 -07003174 return;
3175 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003176 case Intrinsics::Trap:
3177 _ud2();
Jan Voung5cd240d2014-06-25 10:36:46 -07003178 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003179 case Intrinsics::UnknownIntrinsic:
3180 Func->setError("Should not be lowering UnknownIntrinsic");
3181 return;
3182 }
3183 return;
3184}
3185
Jan Vounga3a01a22014-07-14 10:32:41 -07003186void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
3187 Operand *Expected, Operand *Desired) {
3188 if (Expected->getType() == IceType_i64) {
3189 // Reserve the pre-colored registers first, before adding any more
3190 // infinite-weight variables from FormMemoryOperand's legalization.
Jan Voungbd385e42014-09-18 18:18:10 -07003191 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3192 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3193 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3194 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003195 _mov(T_eax, loOperand(Expected));
3196 _mov(T_edx, hiOperand(Expected));
3197 _mov(T_ebx, loOperand(Desired));
3198 _mov(T_ecx, hiOperand(Desired));
3199 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3200 const bool Locked = true;
3201 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3202 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3203 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3204 _mov(DestLo, T_eax);
3205 _mov(DestHi, T_edx);
3206 return;
3207 }
Jan Voungbd385e42014-09-18 18:18:10 -07003208 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003209 _mov(T_eax, Expected);
3210 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3211 Variable *DesiredReg = legalizeToVar(Desired);
3212 const bool Locked = true;
3213 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3214 _mov(DestPrev, T_eax);
3215}
3216
Jan Voungc820ddf2014-07-29 14:38:51 -07003217bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
3218 Operand *Expected,
3219 Operand *Desired) {
3220 if (Ctx->getOptLevel() == Opt_m1)
3221 return false;
3222 // Peek ahead a few instructions and see how Dest is used.
3223 // It's very common to have:
3224 //
3225 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3226 // [%y_phi = ...] // list of phi stores
3227 // %p = icmp eq i32 %x, %expected
3228 // br i1 %p, label %l1, label %l2
3229 //
3230 // which we can optimize into:
3231 //
3232 // %x = <cmpxchg code>
3233 // [%y_phi = ...] // list of phi stores
3234 // br eq, %l1, %l2
3235 InstList::iterator I = Context.getCur();
3236 // I is currently the InstIntrinsicCall. Peek past that.
3237 // This assumes that the atomic cmpxchg has not been lowered yet,
3238 // so that the instructions seen in the scan from "Cur" is simple.
3239 assert(llvm::isa<InstIntrinsicCall>(*I));
3240 Inst *NextInst = Context.getNextInst(I);
3241 if (!NextInst)
3242 return false;
3243 // There might be phi assignments right before the compare+branch, since this
3244 // could be a backward branch for a loop. This placement of assignments is
3245 // determined by placePhiStores().
3246 std::vector<InstAssign *> PhiAssigns;
3247 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3248 if (PhiAssign->getDest() == Dest)
3249 return false;
3250 PhiAssigns.push_back(PhiAssign);
3251 NextInst = Context.getNextInst(I);
3252 if (!NextInst)
3253 return false;
3254 }
3255 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3256 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3257 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3258 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3259 return false;
3260 }
3261 NextInst = Context.getNextInst(I);
3262 if (!NextInst)
3263 return false;
3264 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3265 if (!NextBr->isUnconditional() &&
3266 NextCmp->getDest() == NextBr->getCondition() &&
3267 NextBr->isLastUse(NextCmp->getDest())) {
3268 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3269 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3270 // Lower the phi assignments now, before the branch (same placement
3271 // as before).
3272 InstAssign *PhiAssign = PhiAssigns[i];
Jan Voungc820ddf2014-07-29 14:38:51 -07003273 PhiAssign->setDeleted();
Jim Stichnoth89d79562014-08-27 13:50:03 -07003274 lowerAssign(PhiAssign);
Jan Voungc820ddf2014-07-29 14:38:51 -07003275 Context.advanceNext();
3276 }
Jan Voungbd385e42014-09-18 18:18:10 -07003277 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
Jan Voungc820ddf2014-07-29 14:38:51 -07003278 // Skip over the old compare and branch, by deleting them.
3279 NextCmp->setDeleted();
3280 NextBr->setDeleted();
3281 Context.advanceNext();
3282 Context.advanceNext();
3283 return true;
3284 }
3285 }
3286 }
3287 return false;
3288}
3289
Jan Voung5cd240d2014-06-25 10:36:46 -07003290void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3291 Operand *Ptr, Operand *Val) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003292 bool NeedsCmpxchg = false;
3293 LowerBinOp Op_Lo = NULL;
3294 LowerBinOp Op_Hi = NULL;
Jan Voung5cd240d2014-06-25 10:36:46 -07003295 switch (Operation) {
3296 default:
3297 Func->setError("Unknown AtomicRMW operation");
3298 return;
3299 case Intrinsics::AtomicAdd: {
3300 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003301 // All the fall-through paths must set this to true, but use this
3302 // for asserting.
3303 NeedsCmpxchg = true;
3304 Op_Lo = &TargetX8632::_add;
3305 Op_Hi = &TargetX8632::_adc;
3306 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003307 }
3308 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3309 const bool Locked = true;
3310 Variable *T = NULL;
3311 _mov(T, Val);
3312 _xadd(Addr, T, Locked);
3313 _mov(Dest, T);
3314 return;
3315 }
3316 case Intrinsics::AtomicSub: {
3317 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003318 NeedsCmpxchg = true;
3319 Op_Lo = &TargetX8632::_sub;
3320 Op_Hi = &TargetX8632::_sbb;
3321 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003322 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003323 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3324 const bool Locked = true;
3325 Variable *T = NULL;
3326 _mov(T, Val);
3327 _neg(T);
3328 _xadd(Addr, T, Locked);
3329 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003330 return;
3331 }
3332 case Intrinsics::AtomicOr:
Jan Vounga3a01a22014-07-14 10:32:41 -07003333 // TODO(jvoung): If Dest is null or dead, then some of these
3334 // operations do not need an "exchange", but just a locked op.
3335 // That appears to be "worth" it for sub, or, and, and xor.
3336 // xadd is probably fine vs lock add for add, and xchg is fine
3337 // vs an atomic store.
3338 NeedsCmpxchg = true;
3339 Op_Lo = &TargetX8632::_or;
3340 Op_Hi = &TargetX8632::_or;
3341 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003342 case Intrinsics::AtomicAnd:
Jan Vounga3a01a22014-07-14 10:32:41 -07003343 NeedsCmpxchg = true;
3344 Op_Lo = &TargetX8632::_and;
3345 Op_Hi = &TargetX8632::_and;
3346 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003347 case Intrinsics::AtomicXor:
Jan Vounga3a01a22014-07-14 10:32:41 -07003348 NeedsCmpxchg = true;
3349 Op_Lo = &TargetX8632::_xor;
3350 Op_Hi = &TargetX8632::_xor;
3351 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003352 case Intrinsics::AtomicExchange:
Jan Vounga3a01a22014-07-14 10:32:41 -07003353 if (Dest->getType() == IceType_i64) {
3354 NeedsCmpxchg = true;
3355 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3356 // just need to be moved to the ecx and ebx registers.
3357 Op_Lo = NULL;
3358 Op_Hi = NULL;
3359 break;
3360 }
3361 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3362 Variable *T = NULL;
3363 _mov(T, Val);
3364 _xchg(Addr, T);
3365 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003366 return;
3367 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003368 // Otherwise, we need a cmpxchg loop.
Jim Stichnoth6e992142014-07-30 14:45:20 -07003369 (void)NeedsCmpxchg;
Jan Vounga3a01a22014-07-14 10:32:41 -07003370 assert(NeedsCmpxchg);
3371 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3372}
3373
3374void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
3375 Variable *Dest, Operand *Ptr,
3376 Operand *Val) {
3377 // Expand a more complex RMW operation as a cmpxchg loop:
3378 // For 64-bit:
3379 // mov eax, [ptr]
3380 // mov edx, [ptr + 4]
3381 // .LABEL:
3382 // mov ebx, eax
3383 // <Op_Lo> ebx, <desired_adj_lo>
3384 // mov ecx, edx
3385 // <Op_Hi> ecx, <desired_adj_hi>
3386 // lock cmpxchg8b [ptr]
3387 // jne .LABEL
3388 // mov <dest_lo>, eax
3389 // mov <dest_lo>, edx
3390 //
3391 // For 32-bit:
3392 // mov eax, [ptr]
3393 // .LABEL:
3394 // mov <reg>, eax
3395 // op <reg>, [desired_adj]
3396 // lock cmpxchg [ptr], <reg>
3397 // jne .LABEL
3398 // mov <dest>, eax
3399 //
3400 // If Op_{Lo,Hi} are NULL, then just copy the value.
3401 Val = legalize(Val);
3402 Type Ty = Val->getType();
3403 if (Ty == IceType_i64) {
Jan Voungbd385e42014-09-18 18:18:10 -07003404 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3405 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003406 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3407 _mov(T_eax, loOperand(Addr));
3408 _mov(T_edx, hiOperand(Addr));
Jan Voungbd385e42014-09-18 18:18:10 -07003409 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3410 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003411 InstX8632Label *Label = InstX8632Label::create(Func, this);
3412 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
3413 if (!IsXchg8b) {
3414 Context.insert(Label);
3415 _mov(T_ebx, T_eax);
3416 (this->*Op_Lo)(T_ebx, loOperand(Val));
3417 _mov(T_ecx, T_edx);
3418 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3419 } else {
3420 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3421 // It just needs the Val loaded into ebx and ecx.
3422 // That can also be done before the loop.
3423 _mov(T_ebx, loOperand(Val));
3424 _mov(T_ecx, hiOperand(Val));
3425 Context.insert(Label);
3426 }
3427 const bool Locked = true;
3428 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003429 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003430 if (!IsXchg8b) {
3431 // If Val is a variable, model the extended live range of Val through
3432 // the end of the loop, since it will be re-used by the loop.
3433 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3434 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3435 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3436 Context.insert(InstFakeUse::create(Func, ValLo));
3437 Context.insert(InstFakeUse::create(Func, ValHi));
3438 }
3439 } else {
3440 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3441 Context.insert(InstFakeUse::create(Func, T_ebx));
3442 Context.insert(InstFakeUse::create(Func, T_ecx));
3443 }
3444 // The address base is also reused in the loop.
3445 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3446 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3447 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3448 _mov(DestLo, T_eax);
3449 _mov(DestHi, T_edx);
3450 return;
3451 }
3452 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
Jan Voungbd385e42014-09-18 18:18:10 -07003453 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003454 _mov(T_eax, Addr);
3455 InstX8632Label *Label = InstX8632Label::create(Func, this);
3456 Context.insert(Label);
3457 // We want to pick a different register for T than Eax, so don't use
3458 // _mov(T == NULL, T_eax).
3459 Variable *T = makeReg(Ty);
3460 _mov(T, T_eax);
3461 (this->*Op_Lo)(T, Val);
3462 const bool Locked = true;
3463 _cmpxchg(Addr, T_eax, T, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003464 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003465 // If Val is a variable, model the extended live range of Val through
3466 // the end of the loop, since it will be re-used by the loop.
3467 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3468 Context.insert(InstFakeUse::create(Func, ValVar));
3469 }
3470 // The address base is also reused in the loop.
3471 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3472 _mov(Dest, T_eax);
Jan Voung5cd240d2014-06-25 10:36:46 -07003473}
3474
Jan Vounge4da26f2014-07-15 17:52:39 -07003475// Lowers count {trailing, leading} zeros intrinsic.
3476//
3477// We could do constant folding here, but that should have
3478// been done by the front-end/middle-end optimizations.
3479void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3480 Operand *FirstVal, Operand *SecondVal) {
3481 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3482 // Then the instructions will handle the Val == 0 case much more simply
3483 // and won't require conversion from bit position to number of zeros.
3484 //
3485 // Otherwise:
3486 // bsr IF_NOT_ZERO, Val
3487 // mov T_DEST, 63
3488 // cmovne T_DEST, IF_NOT_ZERO
3489 // xor T_DEST, 31
3490 // mov DEST, T_DEST
3491 //
3492 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3493 // register. Also, bsf and bsr require their dest to be a register.
3494 //
3495 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3496 // E.g., for 000... 00001100, bsr will say that the most significant bit
3497 // set is at position 3, while the number of leading zeros is 28. Xor is
3498 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3499 //
3500 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3501 // are all zero, and compute the result for that case (checking the lower
3502 // 32 bits). Then actually compute the result for the upper bits and
3503 // cmov in the result from the lower computation if the earlier speculation
3504 // was correct.
3505 //
3506 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3507 // bit position conversion, and the speculation is reversed.
3508 assert(Ty == IceType_i32 || Ty == IceType_i64);
3509 Variable *T = makeReg(IceType_i32);
Matt Wala53c5e602014-07-22 16:39:38 -07003510 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
Jan Vounge4da26f2014-07-15 17:52:39 -07003511 if (Cttz) {
Matt Wala53c5e602014-07-22 16:39:38 -07003512 _bsf(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003513 } else {
Matt Wala53c5e602014-07-22 16:39:38 -07003514 _bsr(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003515 }
3516 Variable *T_Dest = makeReg(IceType_i32);
Jan Voungbc004632014-09-16 15:09:10 -07003517 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32);
3518 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31);
Jan Vounge4da26f2014-07-15 17:52:39 -07003519 if (Cttz) {
3520 _mov(T_Dest, ThirtyTwo);
3521 } else {
Jan Voungbc004632014-09-16 15:09:10 -07003522 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63);
Jan Vounge4da26f2014-07-15 17:52:39 -07003523 _mov(T_Dest, SixtyThree);
3524 }
Jan Voungbd385e42014-09-18 18:18:10 -07003525 _cmov(T_Dest, T, CondX86::Br_ne);
Jan Vounge4da26f2014-07-15 17:52:39 -07003526 if (!Cttz) {
3527 _xor(T_Dest, ThirtyOne);
3528 }
3529 if (Ty == IceType_i32) {
3530 _mov(Dest, T_Dest);
3531 return;
3532 }
3533 _add(T_Dest, ThirtyTwo);
3534 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3535 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3536 // Will be using "test" on this, so we need a registerized variable.
3537 Variable *SecondVar = legalizeToVar(SecondVal);
3538 Variable *T_Dest2 = makeReg(IceType_i32);
3539 if (Cttz) {
3540 _bsf(T_Dest2, SecondVar);
3541 } else {
3542 _bsr(T_Dest2, SecondVar);
3543 _xor(T_Dest2, ThirtyOne);
3544 }
3545 _test(SecondVar, SecondVar);
Jan Voungbd385e42014-09-18 18:18:10 -07003546 _cmov(T_Dest2, T_Dest, CondX86::Br_e);
Jan Vounge4da26f2014-07-15 17:52:39 -07003547 _mov(DestLo, T_Dest2);
3548 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3549}
3550
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003551namespace {
3552
3553bool isAdd(const Inst *Inst) {
3554 if (const InstArithmetic *Arith =
3555 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3556 return (Arith->getOp() == InstArithmetic::Add);
3557 }
3558 return false;
3559}
3560
Jim Stichnoth89d79562014-08-27 13:50:03 -07003561void dumpAddressOpt(const Cfg *Func, const Variable *Base,
3562 const Variable *Index, uint16_t Shift, int32_t Offset,
3563 const Inst *Reason) {
3564 if (!Func->getContext()->isVerbose(IceV_AddrOpt))
3565 return;
3566 Ostream &Str = Func->getContext()->getStrDump();
3567 Str << "Instruction: ";
3568 Reason->dumpDecorated(Func);
3569 Str << " results in Base=";
3570 if (Base)
3571 Base->dump(Func);
3572 else
3573 Str << "<null>";
3574 Str << ", Index=";
3575 if (Index)
3576 Index->dump(Func);
3577 else
3578 Str << "<null>";
3579 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
3580}
3581
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003582bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var,
3583 const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003584 // Var originates from Var=SrcVar ==>
3585 // set Var:=SrcVar
3586 if (Var == NULL)
3587 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003588 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
3589 assert(!VMetadata->isMultiDef(Var));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003590 if (llvm::isa<InstAssign>(VarAssign)) {
3591 Operand *SrcOp = VarAssign->getSrc(0);
3592 assert(SrcOp);
3593 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003594 if (!VMetadata->isMultiDef(SrcVar) &&
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003595 // TODO: ensure SrcVar stays single-BB
3596 true) {
3597 Var = SrcVar;
3598 Reason = VarAssign;
3599 return true;
3600 }
3601 }
3602 }
3603 }
3604 return false;
3605}
3606
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003607bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable *&Base,
3608 Variable *&Index, uint16_t &Shift,
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003609 const Inst *&Reason) {
3610 // Index==NULL && Base is Base=Var1+Var2 ==>
3611 // set Base=Var1, Index=Var2, Shift=0
3612 if (Base == NULL)
3613 return false;
3614 if (Index != NULL)
3615 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003616 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003617 if (BaseInst == NULL)
3618 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003619 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003620 if (BaseInst->getSrcSize() < 2)
3621 return false;
3622 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003623 if (VMetadata->isMultiDef(Var1))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003624 return false;
3625 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003626 if (VMetadata->isMultiDef(Var2))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003627 return false;
3628 if (isAdd(BaseInst) &&
3629 // TODO: ensure Var1 and Var2 stay single-BB
3630 true) {
3631 Base = Var1;
3632 Index = Var2;
3633 Shift = 0; // should already have been 0
3634 Reason = BaseInst;
3635 return true;
3636 }
3637 }
3638 }
3639 return false;
3640}
3641
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003642bool matchShiftedIndex(const VariablesMetadata *VMetadata, Variable *&Index,
3643 uint16_t &Shift, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003644 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
3645 // Index=Var, Shift+=log2(Const)
3646 if (Index == NULL)
3647 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003648 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003649 if (IndexInst == NULL)
3650 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003651 assert(!VMetadata->isMultiDef(Index));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003652 if (IndexInst->getSrcSize() < 2)
3653 return false;
3654 if (const InstArithmetic *ArithInst =
3655 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
3656 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
Jan Voungbc004632014-09-16 15:09:10 -07003657 if (ConstantInteger32 *Const =
3658 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003659 if (ArithInst->getOp() == InstArithmetic::Mul &&
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003660 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003661 uint64_t Mult = Const->getValue();
3662 uint32_t LogMult;
3663 switch (Mult) {
3664 case 1:
3665 LogMult = 0;
3666 break;
3667 case 2:
3668 LogMult = 1;
3669 break;
3670 case 4:
3671 LogMult = 2;
3672 break;
3673 case 8:
3674 LogMult = 3;
3675 break;
3676 default:
3677 return false;
3678 }
3679 if (Shift + LogMult <= 3) {
3680 Index = Var;
3681 Shift += LogMult;
3682 Reason = IndexInst;
3683 return true;
3684 }
3685 }
3686 }
3687 }
3688 }
3689 return false;
3690}
3691
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003692bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
3693 int32_t &Offset, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003694 // Base is Base=Var+Const || Base is Base=Const+Var ==>
3695 // set Base=Var, Offset+=Const
3696 // Base is Base=Var-Const ==>
3697 // set Base=Var, Offset-=Const
3698 if (Base == NULL)
3699 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003700 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003701 if (BaseInst == NULL)
3702 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003703 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003704 if (const InstArithmetic *ArithInst =
3705 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
3706 if (ArithInst->getOp() != InstArithmetic::Add &&
3707 ArithInst->getOp() != InstArithmetic::Sub)
3708 return false;
3709 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
3710 Variable *Var = NULL;
Jan Voungbc004632014-09-16 15:09:10 -07003711 ConstantInteger32 *Const = NULL;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003712 if (Variable *VariableOperand =
3713 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3714 Var = VariableOperand;
Jan Voungbc004632014-09-16 15:09:10 -07003715 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003716 } else if (IsAdd) {
Jan Voungbc004632014-09-16 15:09:10 -07003717 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003718 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
3719 }
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003720 if (Var == NULL || Const == NULL || VMetadata->isMultiDef(Var))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003721 return false;
Jan Voungbc004632014-09-16 15:09:10 -07003722 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
Jan Voung8acded02014-09-22 18:02:25 -07003723 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
Jan Voungbc004632014-09-16 15:09:10 -07003724 return false;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003725 Base = Var;
Jan Voungbc004632014-09-16 15:09:10 -07003726 Offset += MoreOffset;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003727 Reason = BaseInst;
3728 return true;
3729 }
3730 return false;
3731}
3732
Jim Stichnoth89d79562014-08-27 13:50:03 -07003733void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
3734 Variable *&Index, uint16_t &Shift, int32_t &Offset) {
Jim Stichnoth800dab22014-09-20 12:25:02 -07003735 Func->resetCurrentNode();
Jim Stichnoth89d79562014-08-27 13:50:03 -07003736 if (Func->getContext()->isVerbose(IceV_AddrOpt)) {
3737 Ostream &Str = Func->getContext()->getStrDump();
3738 Str << "\nStarting computeAddressOpt for instruction:\n ";
3739 Instr->dumpDecorated(Func);
3740 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003741 (void)Offset; // TODO: pattern-match for non-zero offsets.
3742 if (Base == NULL)
3743 return;
3744 // If the Base has more than one use or is live across multiple
3745 // blocks, then don't go further. Alternatively (?), never consider
3746 // a transformation that would change a variable that is currently
3747 // *not* live across basic block boundaries into one that *is*.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003748 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003749 return;
3750
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003751 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003752 bool Continue = true;
3753 while (Continue) {
3754 const Inst *Reason = NULL;
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003755 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
3756 matchTransitiveAssign(VMetadata, Index, Reason) ||
3757 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
3758 matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
3759 matchOffsetBase(VMetadata, Base, Offset, Reason)) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003760 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
3761 } else {
3762 Continue = false;
Matt Wala8835b892014-08-11 17:46:58 -07003763 }
3764
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003765 // Index is Index=Var<<Const && Const+Shift<=3 ==>
3766 // Index=Var, Shift+=Const
3767
3768 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
3769 // Index=Var, Shift+=log2(Const)
3770
3771 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
3772 // swap(Index,Base)
3773 // Similar for Base=Const*Var and Base=Var<<Const
3774
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003775 // Index is Index=Var+Const ==>
3776 // set Index=Var, Offset+=(Const<<Shift)
3777
3778 // Index is Index=Const+Var ==>
3779 // set Index=Var, Offset+=(Const<<Shift)
3780
3781 // Index is Index=Var-Const ==>
3782 // set Index=Var, Offset-=(Const<<Shift)
3783
3784 // TODO: consider overflow issues with respect to Offset.
3785 // TODO: handle symbolic constants.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003786 }
3787}
3788
3789} // anonymous namespace
3790
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003791void TargetX8632::lowerLoad(const InstLoad *Inst) {
3792 // A Load instruction can be treated the same as an Assign
3793 // instruction, after the source operand is transformed into an
3794 // OperandX8632Mem operand. Note that the address mode
3795 // optimization already creates an OperandX8632Mem operand, so it
3796 // doesn't need another level of transformation.
3797 Type Ty = Inst->getDest()->getType();
Jan Voung5cd240d2014-06-25 10:36:46 -07003798 Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003799
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003800 // Fuse this load with a subsequent Arithmetic instruction in the
3801 // following situations:
3802 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
3803 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
3804 //
3805 // TODO: Clean up and test thoroughly.
Jan Voung5cd240d2014-06-25 10:36:46 -07003806 // (E.g., if there is an mfence-all make sure the load ends up on the
3807 // same side of the fence).
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003808 //
3809 // TODO: Why limit to Arithmetic instructions? This could probably be
3810 // applied to most any instruction type. Look at all source operands
3811 // in the following instruction, and if there is one instance of the
3812 // load instruction's dest variable, and that instruction ends that
3813 // variable's live range, then make the substitution. Deal with
3814 // commutativity optimization in the arithmetic instruction lowering.
3815 InstArithmetic *NewArith = NULL;
3816 if (InstArithmetic *Arith =
3817 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
3818 Variable *DestLoad = Inst->getDest();
3819 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
3820 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
3821 if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
3822 DestLoad != Src0Arith) {
3823 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3824 Arith->getSrc(0), Src0);
3825 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
3826 Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
3827 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3828 Arith->getSrc(1), Src0);
3829 }
3830 if (NewArith) {
3831 Arith->setDeleted();
3832 Context.advanceNext();
3833 lowerArithmetic(NewArith);
3834 return;
3835 }
3836 }
3837
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003838 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
3839 lowerAssign(Assign);
3840}
3841
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003842void TargetX8632::doAddressOptLoad() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08003843 Inst *Inst = Context.getCur();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003844 Variable *Dest = Inst->getDest();
3845 Operand *Addr = Inst->getSrc(0);
3846 Variable *Index = NULL;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003847 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003848 int32_t Offset = 0; // TODO: make Constant
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003849 // Vanilla ICE load instructions should not use the segment registers,
3850 // and computeAddressOpt only works at the level of Variables and Constants,
3851 // not other OperandX8632Mem, so there should be no mention of segment
3852 // registers there either.
3853 const OperandX8632Mem::SegmentRegisters SegmentReg =
3854 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003855 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jim Stichnoth89d79562014-08-27 13:50:03 -07003856 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003857 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07003858 Inst->setDeleted();
Jan Voungbc004632014-09-16 15:09:10 -07003859 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003860 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003861 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003862 Context.insert(InstLoad::create(Func, Dest, Addr));
3863 }
3864}
3865
Matt Walac3302742014-08-15 16:21:56 -07003866void TargetX8632::randomlyInsertNop(float Probability) {
3867 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
3868 if (RNG.getTrueWithProbability(Probability)) {
3869 _nop(RNG.next(X86_NUM_NOP_VARIANTS));
3870 }
3871}
3872
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003873void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
3874 Func->setError("Phi found in regular instruction list");
3875}
3876
3877void TargetX8632::lowerRet(const InstRet *Inst) {
3878 Variable *Reg = NULL;
3879 if (Inst->hasRetValue()) {
3880 Operand *Src0 = legalize(Inst->getRetValue());
3881 if (Src0->getType() == IceType_i64) {
Jim Stichnothad403532014-09-25 12:44:17 -07003882 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
3883 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003884 Reg = eax;
3885 Context.insert(InstFakeUse::create(Func, edx));
Jan Voung3a569182014-09-29 10:16:01 -07003886 } else if (isScalarFloatingType(Src0->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003887 _fld(Src0);
Matt Wala928f1292014-07-07 16:50:46 -07003888 } else if (isVectorType(Src0->getType())) {
Jim Stichnothad403532014-09-25 12:44:17 -07003889 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003890 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07003891 _mov(Reg, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003892 }
3893 }
3894 _ret(Reg);
3895 // Add a fake use of esp to make sure esp stays alive for the entire
3896 // function. Otherwise post-call esp adjustments get dead-code
3897 // eliminated. TODO: Are there more places where the fake use
3898 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
3899 // have a ret instruction.
Jan Voungbd385e42014-09-18 18:18:10 -07003900 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003901 Context.insert(InstFakeUse::create(Func, esp));
3902}
3903
3904void TargetX8632::lowerSelect(const InstSelect *Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003905 Variable *Dest = Inst->getDest();
3906 Operand *SrcT = Inst->getTrueOperand();
3907 Operand *SrcF = Inst->getFalseOperand();
Matt Wala9cb61e22014-07-24 09:44:42 -07003908 Operand *Condition = Inst->getCondition();
3909
3910 if (isVectorType(Dest->getType())) {
Matt Wala9cb61e22014-07-24 09:44:42 -07003911 Type SrcTy = SrcT->getType();
3912 Variable *T = makeReg(SrcTy);
Matt Walae3777672014-07-31 09:06:17 -07003913 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3914 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07003915 if (InstructionSet >= SSE4_1) {
3916 // TODO(wala): If the condition operand is a constant, use blendps
3917 // or pblendw.
3918 //
3919 // Use blendvps or pblendvb to implement select.
3920 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3921 SrcTy == IceType_v4f32) {
Matt Walae3777672014-07-31 09:06:17 -07003922 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Jan Voungbd385e42014-09-18 18:18:10 -07003923 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
Matt Walae3777672014-07-31 09:06:17 -07003924 _movp(xmm0, ConditionRM);
Jan Voungbc004632014-09-16 15:09:10 -07003925 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31));
Matt Walae3777672014-07-31 09:06:17 -07003926 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07003927 _blendvps(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003928 _movp(Dest, T);
3929 } else {
3930 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3931 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
Jim Stichnothfac55172014-10-01 13:06:21 -07003932 : IceType_v16i8;
Jan Voungbd385e42014-09-18 18:18:10 -07003933 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003934 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
Matt Walae3777672014-07-31 09:06:17 -07003935 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07003936 _pblendvb(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003937 _movp(Dest, T);
3938 }
3939 return;
3940 }
3941 // Lower select without SSE4.1:
3942 // a=d?b:c ==>
3943 // if elementtype(d) != i1:
3944 // d=sext(d);
3945 // a=(b&d)|(c&~d);
Matt Wala9cb61e22014-07-24 09:44:42 -07003946 Variable *T2 = makeReg(SrcTy);
3947 // Sign extend the condition operand if applicable.
3948 if (SrcTy == IceType_v4f32) {
3949 // The sext operation takes only integer arguments.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003950 Variable *T3 = Func->makeVariable(IceType_v4i32);
Matt Wala9cb61e22014-07-24 09:44:42 -07003951 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3952 _movp(T, T3);
3953 } else if (typeElementType(SrcTy) != IceType_i1) {
3954 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3955 } else {
Matt Walae3777672014-07-31 09:06:17 -07003956 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3957 _movp(T, ConditionRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07003958 }
Matt Wala9cb61e22014-07-24 09:44:42 -07003959 _movp(T2, T);
Matt Walad4799f42014-08-14 14:24:12 -07003960 _pand(T, SrcTRM);
3961 _pandn(T2, SrcFRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07003962 _por(T, T2);
3963 _movp(Dest, T);
Matt Wala9cb61e22014-07-24 09:44:42 -07003964
3965 return;
3966 }
3967
3968 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003969 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Matt Wala43ff7eb2014-06-18 10:30:07 -07003970 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003971 InstX8632Label *Label = InstX8632Label::create(Func, this);
3972
3973 if (Dest->getType() == IceType_i64) {
3974 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3975 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Jim Stichnothad403532014-09-25 12:44:17 -07003976 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
3977 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003978 _cmp(ConditionRM, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003979 _mov(DestLo, SrcLoRI);
3980 _mov(DestHi, SrcHiRI);
Jan Voungbd385e42014-09-18 18:18:10 -07003981 _br(CondX86::Br_ne, Label);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003982 Operand *SrcFLo = loOperand(SrcF);
3983 Operand *SrcFHi = hiOperand(SrcF);
Jim Stichnothad403532014-09-25 12:44:17 -07003984 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
3985 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07003986 _mov_nonkillable(DestLo, SrcLoRI);
3987 _mov_nonkillable(DestHi, SrcHiRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003988 } else {
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003989 _cmp(ConditionRM, Zero);
Jim Stichnothad403532014-09-25 12:44:17 -07003990 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003991 _mov(Dest, SrcT);
Jan Voungbd385e42014-09-18 18:18:10 -07003992 _br(CondX86::Br_ne, Label);
Jim Stichnothad403532014-09-25 12:44:17 -07003993 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07003994 _mov_nonkillable(Dest, SrcF);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003995 }
3996
3997 Context.insert(Label);
3998}
3999
4000void TargetX8632::lowerStore(const InstStore *Inst) {
4001 Operand *Value = Inst->getData();
4002 Operand *Addr = Inst->getAddr();
Jan Voung5cd240d2014-06-25 10:36:46 -07004003 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
Matt Wala105b7042014-08-11 19:56:19 -07004004 Type Ty = NewAddr->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004005
Matt Wala105b7042014-08-11 19:56:19 -07004006 if (Ty == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004007 Value = legalize(Value);
Jim Stichnothad403532014-09-25 12:44:17 -07004008 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4009 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004010 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
4011 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
Matt Wala105b7042014-08-11 19:56:19 -07004012 } else if (isVectorType(Ty)) {
4013 _storep(legalizeToVar(Value), NewAddr);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004014 } else {
Jim Stichnothad403532014-09-25 12:44:17 -07004015 Value = legalize(Value, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004016 _store(Value, NewAddr);
4017 }
4018}
4019
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004020void TargetX8632::doAddressOptStore() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08004021 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004022 Operand *Data = Inst->getData();
4023 Operand *Addr = Inst->getAddr();
4024 Variable *Index = NULL;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004025 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004026 int32_t Offset = 0; // TODO: make Constant
4027 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004028 // Vanilla ICE store instructions should not use the segment registers,
4029 // and computeAddressOpt only works at the level of Variables and Constants,
4030 // not other OperandX8632Mem, so there should be no mention of segment
4031 // registers there either.
4032 const OperandX8632Mem::SegmentRegisters SegmentReg =
4033 OperandX8632Mem::DefaultSegment;
Jim Stichnoth89d79562014-08-27 13:50:03 -07004034 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004035 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004036 Inst->setDeleted();
Jan Voungbc004632014-09-16 15:09:10 -07004037 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004038 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004039 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004040 Context.insert(InstStore::create(Func, Data, Addr));
4041 }
4042}
4043
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004044void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4045 // This implements the most naive possible lowering.
4046 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4047 Operand *Src0 = Inst->getComparison();
4048 SizeT NumCases = Inst->getNumCases();
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004049 if (Src0->getType() == IceType_i64) {
4050 Src0 = legalize(Src0); // get Base/Index into physical registers
4051 Operand *Src0Lo = loOperand(Src0);
4052 Operand *Src0Hi = hiOperand(Src0);
4053 if (NumCases >= 2) {
4054 Src0Lo = legalizeToVar(Src0Lo);
4055 Src0Hi = legalizeToVar(Src0Hi);
4056 } else {
4057 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4058 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4059 }
4060 for (SizeT I = 0; I < NumCases; ++I) {
4061 Constant *ValueLo = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));
4062 Constant *ValueHi =
4063 Ctx->getConstantInt32(IceType_i32, Inst->getValue(I) >> 32);
4064 InstX8632Label *Label = InstX8632Label::create(Func, this);
4065 _cmp(Src0Lo, ValueLo);
4066 _br(CondX86::Br_ne, Label);
4067 _cmp(Src0Hi, ValueHi);
4068 _br(CondX86::Br_e, Inst->getLabel(I));
4069 Context.insert(Label);
4070 }
4071 _br(Inst->getLabelDefault());
4072 return;
4073 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004074 // OK, we'll be slightly less naive by forcing Src into a physical
4075 // register if there are 2 or more uses.
4076 if (NumCases >= 2)
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004077 Src0 = legalizeToVar(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004078 else
Jim Stichnothad403532014-09-25 12:44:17 -07004079 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004080 for (SizeT I = 0; I < NumCases; ++I) {
Jan Voungbc004632014-09-16 15:09:10 -07004081 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004082 _cmp(Src0, Value);
Jan Voungbd385e42014-09-18 18:18:10 -07004083 _br(CondX86::Br_e, Inst->getLabel(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004084 }
4085
4086 _br(Inst->getLabelDefault());
4087}
4088
Matt Walaafeaee42014-08-07 13:47:30 -07004089void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4090 Variable *Dest, Operand *Src0,
4091 Operand *Src1) {
4092 assert(isVectorType(Dest->getType()));
4093 Type Ty = Dest->getType();
4094 Type ElementTy = typeElementType(Ty);
4095 SizeT NumElements = typeNumElements(Ty);
4096
4097 Operand *T = Ctx->getConstantUndef(Ty);
4098 for (SizeT I = 0; I < NumElements; ++I) {
Jan Voungbc004632014-09-16 15:09:10 -07004099 Constant *Index = Ctx->getConstantInt32(IceType_i32, I);
Matt Walaafeaee42014-08-07 13:47:30 -07004100
4101 // Extract the next two inputs.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004102 Variable *Op0 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004103 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004104 Variable *Op1 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004105 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4106
4107 // Perform the arithmetic as a scalar operation.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004108 Variable *Res = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004109 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4110
4111 // Insert the result into position.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004112 Variable *DestT = Func->makeVariable(Ty);
Matt Walaafeaee42014-08-07 13:47:30 -07004113 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4114 T = DestT;
Matt Walaafeaee42014-08-07 13:47:30 -07004115 }
4116
4117 lowerAssign(InstAssign::create(Func, Dest, T));
4118}
4119
Matt Walace0ca8f2014-07-24 12:34:20 -07004120// The following pattern occurs often in lowered C and C++ code:
4121//
4122// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4123// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4124//
4125// We can eliminate the sext operation by copying the result of pcmpeqd,
4126// pcmpgtd, or cmpps (which produce sign extended results) to the result
4127// of the sext operation.
4128void
4129TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
4130 if (InstCast *NextCast =
4131 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4132 if (NextCast->getCastKind() == InstCast::Sext &&
4133 NextCast->getSrc(0) == SignExtendedResult) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004134 NextCast->setDeleted();
Matt Walace0ca8f2014-07-24 12:34:20 -07004135 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4136 // Skip over the instruction.
Matt Walace0ca8f2014-07-24 12:34:20 -07004137 Context.advanceNext();
4138 }
4139 }
4140}
4141
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004142void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
4143 const SizeT MaxSrcs = 0;
4144 Variable *Dest = NULL;
4145 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
4146 lowerCall(Call);
4147}
4148
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004149// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4150// preserve integrity of liveness analysis. Undef values are also
4151// turned into zeroes, since loOperand() and hiOperand() don't expect
4152// Undef input.
4153void TargetX8632::prelowerPhis() {
4154 CfgNode *Node = Context.getNode();
4155 for (InstPhi *Phi : Node->getPhis()) {
4156 if (Phi->isDeleted())
4157 continue;
4158 Variable *Dest = Phi->getDest();
4159 if (Dest->getType() == IceType_i64) {
4160 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4161 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4162 InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
4163 InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);
4164 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
4165 Operand *Src = Phi->getSrc(I);
4166 CfgNode *Label = Phi->getLabel(I);
4167 if (llvm::isa<ConstantUndef>(Src))
4168 Src = Ctx->getConstantZero(Dest->getType());
4169 PhiLo->addArgument(loOperand(Src), Label);
4170 PhiHi->addArgument(hiOperand(Src), Label);
4171 }
4172 Node->getPhis().push_back(PhiLo);
4173 Node->getPhis().push_back(PhiHi);
4174 Phi->setDeleted();
4175 }
4176 }
4177}
4178
4179namespace {
4180
4181bool isMemoryOperand(const Operand *Opnd) {
4182 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
4183 return !Var->hasReg();
4184 if (llvm::isa<Constant>(Opnd))
4185 return isScalarFloatingType(Opnd->getType());
4186 return true;
4187}
4188
4189} // end of anonymous namespace
4190
4191// Lower the pre-ordered list of assignments into mov instructions.
4192// Also has to do some ad-hoc register allocation as necessary.
4193void TargetX8632::lowerPhiAssignments(CfgNode *Node,
4194 const AssignList &Assignments) {
4195 // Check that this is a properly initialized shell of a node.
4196 assert(Node->getOutEdges().size() == 1);
4197 assert(Node->getInsts().empty());
4198 assert(Node->getPhis().empty());
Jim Stichnothbfb410d2014-11-05 16:04:05 -08004199 CfgNode *Succ = Node->getOutEdges().front();
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004200 getContext().init(Node);
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004201 // Register set setup similar to regAlloc().
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004202 RegSetMask RegInclude = RegSet_All;
4203 RegSetMask RegExclude = RegSet_StackPointer;
4204 if (hasFramePointer())
4205 RegExclude |= RegSet_FramePointer;
4206 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
4207 bool NeedsRegs = false;
4208 // Initialize the set of available registers to the set of what is
4209 // available (not live) at the beginning of the successor block,
4210 // minus all registers used as Dest operands in the Assignments. To
4211 // do this, we start off assuming all registers are available, then
4212 // iterate through the Assignments and remove Dest registers.
4213 // During this iteration, we also determine whether we will actually
4214 // need any extra registers for memory-to-memory copies. If so, we
4215 // do the actual work of removing the live-in registers from the
4216 // set. TODO(stichnot): This work is being repeated for every split
4217 // edge to the successor, so consider updating LiveIn just once
4218 // after all the edges are split.
4219 for (InstAssign *Assign : Assignments) {
4220 Variable *Dest = Assign->getDest();
4221 if (Dest->hasReg()) {
4222 Available[Dest->getRegNum()] = false;
4223 } else if (isMemoryOperand(Assign->getSrc(0))) {
4224 NeedsRegs = true; // Src and Dest are both in memory
4225 }
4226 }
4227 if (NeedsRegs) {
4228 LivenessBV &LiveIn = Func->getLiveness()->getLiveIn(Succ);
4229 for (int i = LiveIn.find_first(); i != -1; i = LiveIn.find_next(i)) {
4230 Variable *Var = Func->getLiveness()->getVariable(i, Succ);
4231 if (Var->hasReg())
4232 Available[Var->getRegNum()] = false;
4233 }
4234 }
4235 // Iterate backwards through the Assignments. After lowering each
4236 // assignment, add Dest to the set of available registers, and
4237 // remove Src from the set of available registers. Iteration is
4238 // done backwards to enable incremental updates of the available
4239 // register set, and the lowered instruction numbers may be out of
4240 // order, but that can be worked around by renumbering the block
4241 // afterwards if necessary.
4242 for (auto I = Assignments.rbegin(), E = Assignments.rend(); I != E; ++I) {
4243 Context.rewind();
4244 InstAssign *Assign = *I;
4245 Variable *Dest = Assign->getDest();
4246 Operand *Src = Assign->getSrc(0);
4247 Variable *SrcVar = llvm::dyn_cast<Variable>(Src);
4248 // Use normal assignment lowering, except lower mem=mem specially
4249 // so we can register-allocate at the same time.
4250 if (!isMemoryOperand(Dest) || !isMemoryOperand(Src)) {
4251 lowerAssign(Assign);
4252 } else {
4253 assert(Dest->getType() == Src->getType());
4254 const llvm::SmallBitVector &RegsForType =
4255 getRegisterSetForType(Dest->getType());
4256 llvm::SmallBitVector AvailRegsForType = RegsForType & Available;
4257 Variable *SpillLoc = NULL;
4258 Variable *Preg = NULL;
4259 // TODO(stichnot): Opportunity for register randomization.
4260 int32_t RegNum = AvailRegsForType.find_first();
4261 bool IsVector = isVectorType(Dest->getType());
4262 bool NeedSpill = (RegNum == -1);
4263 if (NeedSpill) {
4264 // Pick some register to spill and update RegNum.
4265 // TODO(stichnot): Opportunity for register randomization.
4266 RegNum = RegsForType.find_first();
4267 Preg = getPhysicalRegister(RegNum, Dest->getType());
4268 SpillLoc = Func->makeVariable(Dest->getType());
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004269 if (IsVector)
4270 _movp(SpillLoc, Preg);
4271 else
4272 _mov(SpillLoc, Preg);
4273 }
4274 assert(RegNum >= 0);
4275 if (llvm::isa<ConstantUndef>(Src))
4276 // Materialize an actual constant instead of undef. RegNum is
4277 // passed in for vector types because undef vectors are
4278 // lowered to vector register of zeroes.
4279 Src =
4280 legalize(Src, Legal_All, IsVector ? RegNum : Variable::NoRegister);
4281 Variable *Tmp = makeReg(Dest->getType(), RegNum);
4282 if (IsVector) {
4283 _movp(Tmp, Src);
4284 _movp(Dest, Tmp);
4285 } else {
4286 _mov(Tmp, Src);
4287 _mov(Dest, Tmp);
4288 }
4289 if (NeedSpill) {
4290 // Restore the spilled register.
4291 if (IsVector)
4292 _movp(Preg, SpillLoc);
4293 else
4294 _mov(Preg, SpillLoc);
4295 }
4296 }
4297 // Update register availability before moving to the previous
4298 // instruction on the Assignments list.
4299 if (Dest->hasReg())
4300 Available[Dest->getRegNum()] = true;
4301 if (SrcVar && SrcVar->hasReg())
4302 Available[SrcVar->getRegNum()] = false;
4303 }
4304
4305 // Add the terminator branch instruction to the end.
4306 Context.setInsertPoint(Context.end());
4307 _br(Succ);
4308}
4309
Matt Wala9a0168a2014-07-23 14:56:10 -07004310// There is no support for loading or emitting vector constants, so the
4311// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4312// etc. are initialized with register operations.
4313//
4314// TODO(wala): Add limited support for vector constants so that
4315// complex initialization in registers is unnecessary.
4316
Matt Wala83b80362014-07-16 10:21:30 -07004317Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004318 Variable *Reg = makeReg(Ty, RegNum);
4319 // Insert a FakeDef, since otherwise the live range of Reg might
4320 // be overestimated.
4321 Context.insert(InstFakeDef::create(Func, Reg));
4322 _pxor(Reg, Reg);
4323 return Reg;
4324}
4325
Matt Wala9a0168a2014-07-23 14:56:10 -07004326Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
4327 Variable *MinusOnes = makeReg(Ty, RegNum);
4328 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
4329 Context.insert(InstFakeDef::create(Func, MinusOnes));
4330 _pcmpeq(MinusOnes, MinusOnes);
4331 return MinusOnes;
4332}
4333
Matt Wala83b80362014-07-16 10:21:30 -07004334Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004335 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
Matt Wala9a0168a2014-07-23 14:56:10 -07004336 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
Matt Wala83b80362014-07-16 10:21:30 -07004337 _psub(Dest, MinusOne);
4338 return Dest;
4339}
4340
Matt Wala9a0168a2014-07-23 14:56:10 -07004341Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
4342 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4343 Ty == IceType_v16i8);
4344 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4345 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4346 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
Jan Voungbc004632014-09-16 15:09:10 -07004347 _psll(Reg, Ctx->getConstantInt32(IceType_i8, Shift));
Matt Wala9a0168a2014-07-23 14:56:10 -07004348 return Reg;
4349 } else {
4350 // SSE has no left shift operation for vectors of 8 bit integers.
4351 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4352 Constant *ConstantMask =
Jan Voungbc004632014-09-16 15:09:10 -07004353 Ctx->getConstantInt32(IceType_i32, HIGH_ORDER_BITS_MASK);
Matt Wala9a0168a2014-07-23 14:56:10 -07004354 Variable *Reg = makeReg(Ty, RegNum);
4355 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4356 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4357 return Reg;
4358 }
4359}
4360
Matt Wala49889232014-07-18 12:45:09 -07004361OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
4362 Variable *Slot,
4363 uint32_t Offset) {
4364 // Ensure that Loc is a stack slot.
4365 assert(Slot->getWeight() == RegWeight::Zero);
4366 assert(Slot->getRegNum() == Variable::NoRegister);
4367 // Compute the location of Loc in memory.
4368 // TODO(wala,stichnot): lea should not be required. The address of
4369 // the stack slot is known at compile time (although not until after
4370 // addProlog()).
4371 const Type PointerType = IceType_i32;
4372 Variable *Loc = makeReg(PointerType);
4373 _lea(Loc, Slot);
Jan Voungbc004632014-09-16 15:09:10 -07004374 Constant *ConstantOffset = Ctx->getConstantInt32(IceType_i32, Offset);
Matt Wala49889232014-07-18 12:45:09 -07004375 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4376}
4377
Matt Wala928f1292014-07-07 16:50:46 -07004378// Helper for legalize() to emit the right code to lower an operand to a
4379// register of the appropriate type.
4380Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
4381 Type Ty = Src->getType();
4382 Variable *Reg = makeReg(Ty, RegNum);
Matt Walaad8f7262014-07-14 17:37:37 -07004383 if (isVectorType(Ty)) {
Matt Wala928f1292014-07-07 16:50:46 -07004384 _movp(Reg, Src);
4385 } else {
4386 _mov(Reg, Src);
4387 }
4388 return Reg;
4389}
4390
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004391Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
Jim Stichnothad403532014-09-25 12:44:17 -07004392 int32_t RegNum) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004393 // Assert that a physical register is allowed. To date, all calls
4394 // to legalize() allow a physical register. If a physical register
4395 // needs to be explicitly disallowed, then new code will need to be
4396 // written to force a spill.
4397 assert(Allowed & Legal_Reg);
4398 // If we're asking for a specific physical register, make sure we're
4399 // not allowing any other operand kinds. (This could be future
4400 // work, e.g. allow the shl shift amount to be either an immediate
4401 // or in ecx.)
4402 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
4403 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
4404 // Before doing anything with a Mem operand, we need to ensure
4405 // that the Base and Index components are in physical registers.
4406 Variable *Base = Mem->getBase();
4407 Variable *Index = Mem->getIndex();
4408 Variable *RegBase = NULL;
4409 Variable *RegIndex = NULL;
4410 if (Base) {
Jim Stichnothad403532014-09-25 12:44:17 -07004411 RegBase = legalizeToVar(Base);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004412 }
4413 if (Index) {
Jim Stichnothad403532014-09-25 12:44:17 -07004414 RegIndex = legalizeToVar(Index);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004415 }
4416 if (Base != RegBase || Index != RegIndex) {
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004417 From = OperandX8632Mem::create(
4418 Func, Mem->getType(), RegBase, Mem->getOffset(), RegIndex,
4419 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004420 }
4421
4422 if (!(Allowed & Legal_Mem)) {
Matt Wala928f1292014-07-07 16:50:46 -07004423 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004424 }
4425 return From;
4426 }
4427 if (llvm::isa<Constant>(From)) {
Matt Walad8f4a7d2014-06-18 09:55:03 -07004428 if (llvm::isa<ConstantUndef>(From)) {
4429 // Lower undefs to zero. Another option is to lower undefs to an
4430 // uninitialized register; however, using an uninitialized register
4431 // results in less predictable code.
4432 //
4433 // If in the future the implementation is changed to lower undef
4434 // values to uninitialized registers, a FakeDef will be needed:
4435 // Context.insert(InstFakeDef::create(Func, Reg));
4436 // This is in order to ensure that the live range of Reg is not
4437 // overestimated. If the constant being lowered is a 64 bit value,
4438 // then the result should be split and the lo and hi components will
4439 // need to go in uninitialized registers.
Matt Wala83b80362014-07-16 10:21:30 -07004440 if (isVectorType(From->getType()))
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004441 return makeVectorOfZeros(From->getType(), RegNum);
Matt Wala83b80362014-07-16 10:21:30 -07004442 From = Ctx->getConstantZero(From->getType());
Matt Walad8f4a7d2014-06-18 09:55:03 -07004443 }
Matt Walaad8f7262014-07-14 17:37:37 -07004444 // There should be no constants of vector type (other than undef).
4445 assert(!isVectorType(From->getType()));
Jim Stichnothde4ca712014-06-29 08:13:48 -07004446 bool NeedsReg = false;
4447 if (!(Allowed & Legal_Imm))
4448 // Immediate specifically not allowed
4449 NeedsReg = true;
Jan Voung3a569182014-09-29 10:16:01 -07004450 if (!(Allowed & Legal_Mem) && isScalarFloatingType(From->getType()))
Jim Stichnothde4ca712014-06-29 08:13:48 -07004451 // On x86, FP constants are lowered to mem operands.
4452 NeedsReg = true;
Jan Voung5a13f452014-06-18 10:42:02 -07004453 if (NeedsReg) {
Matt Wala928f1292014-07-07 16:50:46 -07004454 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004455 }
4456 return From;
4457 }
4458 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
Matt Walaad8f7262014-07-14 17:37:37 -07004459 // Check if the variable is guaranteed a physical register. This
4460 // can happen either when the variable is pre-colored or when it is
4461 // assigned infinite weight.
4462 bool MustHaveRegister =
4463 (Var->hasReg() || Var->getWeight() == RegWeight::Inf);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004464 // We need a new physical register for the operand if:
Jim Stichnothca662e92014-07-10 15:32:36 -07004465 // Mem is not allowed and Var isn't guaranteed a physical
4466 // register, or
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004467 // RegNum is required and Var->getRegNum() doesn't match.
Matt Walaad8f7262014-07-14 17:37:37 -07004468 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004469 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
Jim Stichnothad403532014-09-25 12:44:17 -07004470 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004471 }
4472 return From;
4473 }
4474 llvm_unreachable("Unhandled operand kind in legalize()");
4475 return From;
4476}
4477
4478// Provide a trivial wrapper to legalize() for this common usage.
Jim Stichnothad403532014-09-25 12:44:17 -07004479Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
4480 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004481}
4482
Jan Voung5cd240d2014-06-25 10:36:46 -07004483OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
4484 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
4485 // It may be the case that address mode optimization already creates
4486 // an OperandX8632Mem, so in that case it wouldn't need another level
4487 // of transformation.
4488 if (!Mem) {
4489 Variable *Base = llvm::dyn_cast<Variable>(Operand);
4490 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
4491 assert(Base || Offset);
Matt Walae3777672014-07-31 09:06:17 -07004492 if (Offset) {
Jan Voungbc004632014-09-16 15:09:10 -07004493 assert(llvm::isa<ConstantInteger32>(Offset) ||
Matt Walae3777672014-07-31 09:06:17 -07004494 llvm::isa<ConstantRelocatable>(Offset));
4495 }
Jan Voung5cd240d2014-06-25 10:36:46 -07004496 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
4497 }
4498 return llvm::cast<OperandX8632Mem>(legalize(Mem));
4499}
4500
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004501Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
Jan Voung1ee34162014-06-24 13:43:30 -07004502 // There aren't any 64-bit integer registers for x86-32.
4503 assert(Type != IceType_i64);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004504 Variable *Reg = Func->makeVariable(Type);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004505 if (RegNum == Variable::NoRegister)
4506 Reg->setWeightInfinite();
4507 else
4508 Reg->setRegNum(RegNum);
4509 return Reg;
4510}
4511
4512void TargetX8632::postLower() {
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004513 if (Ctx->getOptLevel() == Opt_m1)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004514 return;
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004515 // Find two-address non-SSA instructions where Dest==Src0, and set
4516 // the DestNonKillable flag to keep liveness analysis consistent.
Jim Stichnoth607e9f02014-11-06 13:32:05 -08004517 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004518 if (Inst->isDeleted())
4519 continue;
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004520 if (Variable *Dest = Inst->getDest()) {
4521 // TODO(stichnot): We may need to consider all source
4522 // operands, not just the first one, if using 3-address
4523 // instructions.
4524 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
4525 Inst->setDestNonKillable();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004526 }
4527 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004528}
4529
Jan Voungbc004632014-09-16 15:09:10 -07004530template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
Matt Wala928f1292014-07-07 16:50:46 -07004531 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothbca2f652014-11-01 10:13:54 -07004532 Str << "$" << (int32_t)getValue();
Jan Voungbc004632014-09-16 15:09:10 -07004533}
4534
4535template <> void ConstantInteger64::emit(GlobalContext *) const {
4536 llvm_unreachable("Not expecting to emit 64-bit integers");
Matt Wala928f1292014-07-07 16:50:46 -07004537}
4538
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004539template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
4540 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothbca2f652014-11-01 10:13:54 -07004541 Str << ".L$" << IceType_f32 << "$" << getPoolEntryID();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004542}
4543
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004544template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
4545 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothbca2f652014-11-01 10:13:54 -07004546 Str << ".L$" << IceType_f64 << "$" << getPoolEntryID();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004547}
4548
Matt Walae3777672014-07-31 09:06:17 -07004549void ConstantUndef::emit(GlobalContext *) const {
4550 llvm_unreachable("undef value encountered by emitter.");
4551}
4552
Jim Stichnothde4ca712014-06-29 08:13:48 -07004553TargetGlobalInitX8632::TargetGlobalInitX8632(GlobalContext *Ctx)
4554 : TargetGlobalInitLowering(Ctx) {}
4555
Karl Schimpf9d98d792014-10-13 15:01:08 -07004556void TargetGlobalInitX8632::lower(const VariableDeclaration &Var) {
Jim Stichnothde4ca712014-06-29 08:13:48 -07004557
4558 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothde4ca712014-06-29 08:13:48 -07004559
Karl Schimpf9d98d792014-10-13 15:01:08 -07004560 const VariableDeclaration::InitializerListType &Initializers =
4561 Var.getInitializers();
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004562
4563 // If external and not initialized, this must be a cross test.
4564 // Don't generate a declaration for such cases.
Jim Stichnoth088b2be2014-10-23 12:02:08 -07004565 bool IsExternal = Var.isExternal() || Ctx->getFlags().DisableInternal;
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004566 if (IsExternal && !Var.hasInitializer()) return;
4567
4568 bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004569 bool IsConstant = Var.getIsConstant();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004570 uint32_t Align = Var.getAlignment();
4571 SizeT Size = Var.getNumBytes();
4572 IceString MangledName = Var.mangleName(Ctx);
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004573 IceString SectionSuffix = "";
4574 if (Ctx->getFlags().DataSections)
4575 SectionSuffix = "." + MangledName;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004576
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004577 Str << "\t.type\t" << MangledName << ",@object\n";
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004578
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004579 if (IsConstant)
4580 Str << "\t.section\t.rodata" << SectionSuffix << ",\"a\",@progbits\n";
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004581 else if (HasNonzeroInitializer)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004582 Str << "\t.section\t.data" << SectionSuffix << ",\"aw\",@progbits\n";
4583 else if (IsExternal)
4584 Str << "\t.section\t.bss" << SectionSuffix << ",\"aw\",@nobits\n";
4585 // No .section for non-constant + zeroinitializer + internal
4586
4587 if (IsExternal)
4588 Str << "\t.globl\t" << MangledName << "\n";
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004589 else if (!IsConstant && !HasNonzeroInitializer)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004590 Str << "\t.local\t" << MangledName << "\n";
4591 // Internal symbols only get .local when using .comm.
4592
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004593 if ((IsConstant || HasNonzeroInitializer || IsExternal) && Align > 1)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004594 Str << "\t.align\t" << Align << "\n";
4595 // Alignment is part of .comm.
4596
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004597 if (IsConstant || HasNonzeroInitializer || IsExternal)
Jim Stichnothde4ca712014-06-29 08:13:48 -07004598 Str << MangledName << ":\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004599 else
4600 Str << "\t.comm\t" << MangledName << "," << Size << "," << Align << "\n";
4601
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004602 if (HasNonzeroInitializer) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004603 for (VariableDeclaration::Initializer *Init : Initializers) {
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004604 switch (Init->getKind()) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004605 case VariableDeclaration::Initializer::DataInitializerKind: {
4606 const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(Init)
4607 ->getContents();
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004608 for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
4609 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4610 }
4611 break;
4612 }
Karl Schimpf9d98d792014-10-13 15:01:08 -07004613 case VariableDeclaration::Initializer::ZeroInitializerKind:
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004614 Str << "\t.zero\t" << Init->getNumBytes() << "\n";
4615 break;
Karl Schimpf9d98d792014-10-13 15:01:08 -07004616 case VariableDeclaration::Initializer::RelocInitializerKind: {
4617 const auto Reloc =
4618 llvm::cast<VariableDeclaration::RelocInitializer>(Init);
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004619 Str << "\t.long\t";
Karl Schimpf9d98d792014-10-13 15:01:08 -07004620 Str << Reloc->getDeclaration()->mangleName(Ctx);
Jan Voungc0d965f2014-11-04 16:55:01 -08004621 if (RelocOffsetT Offset = Reloc->getOffset()) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004622 if (Offset >= 0 || (Offset == INT32_MIN))
4623 Str << " + " << Offset;
4624 else
4625 Str << " - " << -Offset;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004626 }
4627 Str << "\n";
4628 break;
4629 }
4630 default: {
4631 std::string Buffer;
4632 llvm::raw_string_ostream StrBuf(Buffer);
4633 StrBuf << "Unable to lower initializer: ";
4634 Init->dump(StrBuf);
4635 llvm::report_fatal_error(StrBuf.str());
4636 break;
4637 }
4638 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07004639 }
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004640 } else if (IsConstant || IsExternal)
4641 Str << "\t.zero\t" << Size << "\n";
4642 // Size is part of .comm.
4643
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004644 if (IsConstant || HasNonzeroInitializer || IsExternal)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004645 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4646 // Size is part of .comm.
Jim Stichnothde4ca712014-06-29 08:13:48 -07004647}
4648
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004649} // end of namespace Ice