blob: 8082c5223d41e58e14f8ed9caecb9d51602511a1 [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
12// high-level instruction. It also implements
13// TargetX8632Fast::postLower() which does the simplest possible
14// register allocation for the "fast" target.
15//
16//===----------------------------------------------------------------------===//
17
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070018#include "llvm/ADT/DenseMap.h"
19#include "llvm/Support/CommandLine.h"
20#include "llvm/Support/MathExtras.h"
21
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070022#include "IceCfg.h"
23#include "IceCfgNode.h"
Jim Stichnothbfb03e52014-08-26 10:29:05 -070024#include "IceClFlags.h"
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070025#include "IceDefs.h"
Karl Schimpfe3f64d02014-10-07 10:38:22 -070026#include "IceGlobalInits.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070027#include "IceInstX8632.h"
28#include "IceOperand.h"
Jan Voungbd385e42014-09-18 18:18:10 -070029#include "IceRegistersX8632.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070030#include "IceTargetLoweringX8632.def"
31#include "IceTargetLoweringX8632.h"
Jan Voung8acded02014-09-22 18:02:25 -070032#include "IceUtils.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070033
34namespace Ice {
35
36namespace {
37
Matt Walace0ca8f2014-07-24 12:34:20 -070038// The following table summarizes the logic for lowering the fcmp
39// instruction. There is one table entry for each of the 16 conditions.
40//
41// The first four columns describe the case when the operands are
42// floating point scalar values. A comment in lowerFcmp() describes the
43// lowering template. In the most general case, there is a compare
44// followed by two conditional branches, because some fcmp conditions
45// don't map to a single x86 conditional branch. However, in many cases
46// it is possible to swap the operands in the comparison and have a
47// single conditional branch. Since it's quite tedious to validate the
48// table by hand, good execution tests are helpful.
49//
50// The last two columns describe the case when the operands are vectors
51// of floating point values. For most fcmp conditions, there is a clear
52// mapping to a single x86 cmpps instruction variant. Some fcmp
53// conditions require special code to handle and these are marked in the
54// table with a Cmpps_Invalid predicate.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070055const struct TableFcmp_ {
56 uint32_t Default;
Matt Walace0ca8f2014-07-24 12:34:20 -070057 bool SwapScalarOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070058 CondX86::BrCond C1, C2;
Matt Walace0ca8f2014-07-24 12:34:20 -070059 bool SwapVectorOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070060 CondX86::CmppsCond Predicate;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070061} TableFcmp[] = {
Matt Walace0ca8f2014-07-24 12:34:20 -070062#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jan Voungbd385e42014-09-18 18:18:10 -070063 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070064 ,
Jim Stichnothfac55172014-10-01 13:06:21 -070065 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070066#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -070067 };
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070068const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
69
70// The following table summarizes the logic for lowering the icmp instruction
71// for i32 and narrower types. Each icmp condition has a clear mapping to an
72// x86 conditional branch instruction.
73
74const struct TableIcmp32_ {
Jan Voungbd385e42014-09-18 18:18:10 -070075 CondX86::BrCond Mapping;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070076} TableIcmp32[] = {
77#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070078 { CondX86::C_32 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070079 ,
80 ICMPX8632_TABLE
81#undef X
82 };
83const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
84
85// The following table summarizes the logic for lowering the icmp instruction
86// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
87// conditional branches are needed. For the other conditions, three separate
88// conditional branches are needed.
89const struct TableIcmp64_ {
Jan Voungbd385e42014-09-18 18:18:10 -070090 CondX86::BrCond C1, C2, C3;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070091} TableIcmp64[] = {
92#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070093 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070094 ,
95 ICMPX8632_TABLE
96#undef X
97 };
98const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
99
Jan Voungbd385e42014-09-18 18:18:10 -0700100CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700101 size_t Index = static_cast<size_t>(Cond);
102 assert(Index < TableIcmp32Size);
103 return TableIcmp32[Index].Mapping;
104}
105
Matt Wala49889232014-07-18 12:45:09 -0700106const struct TableTypeX8632Attributes_ {
107 Type InVectorElementType;
108} TableTypeX8632Attributes[] = {
109#define X(tag, elementty, cvt, sdss, pack, width) \
110 { elementty } \
111 ,
112 ICETYPEX8632_TABLE
113#undef X
114 };
115const size_t TableTypeX8632AttributesSize =
116 llvm::array_lengthof(TableTypeX8632Attributes);
117
118// Return the type which the elements of the vector have in the X86
119// representation of the vector.
120Type getInVectorElementType(Type Ty) {
121 assert(isVectorType(Ty));
122 size_t Index = static_cast<size_t>(Ty);
Jim Stichnoth6e992142014-07-30 14:45:20 -0700123 (void)Index;
Matt Wala49889232014-07-18 12:45:09 -0700124 assert(Index < TableTypeX8632AttributesSize);
125 return TableTypeX8632Attributes[Ty].InVectorElementType;
126}
127
Matt Wala45a06232014-07-09 16:33:22 -0700128// The maximum number of arguments to pass in XMM registers
Matt Wala105b7042014-08-11 19:56:19 -0700129const uint32_t X86_MAX_XMM_ARGS = 4;
Matt Wala83b80362014-07-16 10:21:30 -0700130// The number of bits in a byte
Matt Wala105b7042014-08-11 19:56:19 -0700131const uint32_t X86_CHAR_BIT = 8;
132// Stack alignment
133const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
134// Size of the return address on the stack
135const uint32_t X86_RET_IP_SIZE_BYTES = 4;
Matt Walad4799f42014-08-14 14:24:12 -0700136// The base 2 logarithm of the width in bytes of the smallest stack slot
137const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
138// The base 2 logarithm of the width in bytes of the largest stack slot
139const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
Matt Walac3302742014-08-15 16:21:56 -0700140// The number of different NOP instructions
141const uint32_t X86_NUM_NOP_VARIANTS = 5;
Matt Wala105b7042014-08-11 19:56:19 -0700142
Matt Walad4799f42014-08-14 14:24:12 -0700143// Value and Alignment are in bytes. Return Value adjusted to the next
144// highest multiple of Alignment.
145uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
Matt Wala105b7042014-08-11 19:56:19 -0700146 // power of 2
Matt Walad4799f42014-08-14 14:24:12 -0700147 assert((Alignment & (Alignment - 1)) == 0);
148 return (Value + Alignment - 1) & -Alignment;
149}
150
151// Value is in bytes. Return Value adjusted to the next highest multiple
152// of the stack alignment.
153uint32_t applyStackAlignment(uint32_t Value) {
154 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
Matt Wala105b7042014-08-11 19:56:19 -0700155}
Matt Wala45a06232014-07-09 16:33:22 -0700156
Matt Wala0a450512014-07-30 12:44:39 -0700157// Instruction set options
158namespace cl = ::llvm::cl;
Jim Stichnothfac55172014-10-01 13:06:21 -0700159cl::opt<TargetX8632::X86InstructionSet>
160CLInstructionSet("mattr", cl::desc("X86 target attributes"),
161 cl::init(TargetX8632::SSE2),
162 cl::values(clEnumValN(TargetX8632::SSE2, "sse2",
163 "Enable SSE2 instructions (default)"),
164 clEnumValN(TargetX8632::SSE4_1, "sse4.1",
165 "Enable SSE 4.1 instructions"),
166 clEnumValEnd));
Matt Wala0a450512014-07-30 12:44:39 -0700167
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700168// In some cases, there are x-macros tables for both high-level and
169// low-level instructions/operands that use the same enum key value.
170// The tables are kept separate to maintain a proper separation
Jim Stichnothfac55172014-10-01 13:06:21 -0700171// between abstraction layers. There is a risk that the tables could
172// get out of sync if enum values are reordered or if entries are
173// added or deleted. The following dummy namespaces use
174// static_asserts to ensure everything is kept in sync.
175
176// Validate the enum values in FCMPX8632_TABLE.
177namespace dummy1 {
178// Define a temporary set of enum values based on low-level table
179// entries.
180enum _tmp_enum {
Matt Walace0ca8f2014-07-24 12:34:20 -0700181#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700182 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700183#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700184 _num
185};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700186// Define a set of constants based on high-level table entries.
187#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700188ICEINSTFCMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700189#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700190// Define a set of constants based on low-level table entries, and
191// ensure the table entry keys are consistent.
Matt Walace0ca8f2014-07-24 12:34:20 -0700192#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700193 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700194 static_assert( \
195 _table1_##val == _table2_##val, \
196 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
197FCMPX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700198#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700199// Repeat the static asserts with respect to the high-level table
200// entries in case the high-level table has extra entries.
201#define X(tag, str) \
202 static_assert( \
203 _table1_##tag == _table2_##tag, \
204 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
205ICEINSTFCMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700206#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700207} // end of namespace dummy1
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700208
Jim Stichnothfac55172014-10-01 13:06:21 -0700209// Validate the enum values in ICMPX8632_TABLE.
210namespace dummy2 {
211// Define a temporary set of enum values based on low-level table
212// entries.
213enum _tmp_enum {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700214#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700215 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700216#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700217 _num
218};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700219// Define a set of constants based on high-level table entries.
220#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700221ICEINSTICMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700222#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700223// Define a set of constants based on low-level table entries, and
224// ensure the table entry keys are consistent.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700225#define X(val, C_32, C1_64, C2_64, C3_64) \
226 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700227 static_assert( \
228 _table1_##val == _table2_##val, \
229 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
230ICMPX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700231#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700232// Repeat the static asserts with respect to the high-level table
233// entries in case the high-level table has extra entries.
234#define X(tag, str) \
235 static_assert( \
236 _table1_##tag == _table2_##tag, \
237 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
238ICEINSTICMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700239#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700240} // end of namespace dummy2
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700241
Jim Stichnothfac55172014-10-01 13:06:21 -0700242// Validate the enum values in ICETYPEX8632_TABLE.
243namespace dummy3 {
244// Define a temporary set of enum values based on low-level table
245// entries.
246enum _tmp_enum {
Matt Wala49889232014-07-18 12:45:09 -0700247#define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag,
Jim Stichnothfac55172014-10-01 13:06:21 -0700248 ICETYPEX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700249#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700250 _num
251};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700252// Define a set of constants based on high-level table entries.
Matt Wala928f1292014-07-07 16:50:46 -0700253#define X(tag, size, align, elts, elty, str) \
254 static const int _table1_##tag = tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700255ICETYPE_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700256#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700257// Define a set of constants based on low-level table entries, and
258// ensure the table entry keys are consistent.
Matt Wala49889232014-07-18 12:45:09 -0700259#define X(tag, elementty, cvt, sdss, pack, width) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700260 static const int _table2_##tag = _tmp_##tag; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700261 static_assert(_table1_##tag == _table2_##tag, \
262 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
263ICETYPEX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700264#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700265// Repeat the static asserts with respect to the high-level table
266// entries in case the high-level table has extra entries.
Matt Wala928f1292014-07-07 16:50:46 -0700267#define X(tag, size, align, elts, elty, str) \
Jim Stichnothfac55172014-10-01 13:06:21 -0700268 static_assert(_table1_##tag == _table2_##tag, \
269 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
270ICETYPE_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700271#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700272} // end of namespace dummy3
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700273
274} // end of anonymous namespace
275
276TargetX8632::TargetX8632(Cfg *Func)
Matt Wala0a450512014-07-30 12:44:39 -0700277 : TargetLowering(Func), InstructionSet(CLInstructionSet),
Matt Wala105b7042014-08-11 19:56:19 -0700278 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
Matt Walad4799f42014-08-14 14:24:12 -0700279 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
Jan Voungbd385e42014-09-18 18:18:10 -0700280 PhysicalRegisters(VarList(RegX8632::Reg_NUM)) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700281 // TODO: Don't initialize IntegerRegisters and friends every time.
282 // Instead, initialize in some sort of static initializer for the
283 // class.
Jan Voungbd385e42014-09-18 18:18:10 -0700284 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
285 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
286 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
287 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
288 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
289 ScratchRegs.resize(RegX8632::Reg_NUM);
290#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700291 frameptr, isI8, isInt, isFP) \
Jan Voungbd385e42014-09-18 18:18:10 -0700292 IntegerRegisters[RegX8632::val] = isInt; \
293 IntegerRegistersI8[RegX8632::val] = isI8; \
294 FloatRegisters[RegX8632::val] = isFP; \
295 VectorRegisters[RegX8632::val] = isFP; \
296 ScratchRegs[RegX8632::val] = scratch;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700297 REGX8632_TABLE;
298#undef X
299 TypeToRegisterSet[IceType_void] = InvalidRegisters;
300 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
301 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
302 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
303 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
304 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
305 TypeToRegisterSet[IceType_f32] = FloatRegisters;
306 TypeToRegisterSet[IceType_f64] = FloatRegisters;
Matt Wala928f1292014-07-07 16:50:46 -0700307 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
308 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
309 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
310 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
311 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
312 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
313 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700314}
315
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700316void TargetX8632::translateO2() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700317 TimerMarker T(TimerStack::TT_O2, Func);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700318
319 // Lower Phi instructions.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700320 Func->placePhiLoads();
321 if (Func->hasError())
322 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700323 Func->placePhiStores();
324 if (Func->hasError())
325 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700326 Func->deletePhis();
327 if (Func->hasError())
328 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700329 Func->dump("After Phi lowering");
330
331 // Address mode optimization.
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700332 Func->getVMetadata()->init(VMK_SingleDefs);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700333 Func->doAddressOpt();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700334
Matt Wala45a06232014-07-09 16:33:22 -0700335 // Argument lowering
Matt Wala45a06232014-07-09 16:33:22 -0700336 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700337
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700338 // Target lowering. This requires liveness analysis for some parts
339 // of the lowering decisions, such as compare/branch fusing. If
340 // non-lightweight liveness analysis is used, the instructions need
341 // to be renumbered first. TODO: This renumbering should only be
342 // necessary if we're actually calculating live intervals, which we
343 // only do for register allocation.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700344 Func->renumberInstructions();
345 if (Func->hasError())
346 return;
Matt Wala45a06232014-07-09 16:33:22 -0700347
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700348 // TODO: It should be sufficient to use the fastest liveness
349 // calculation, i.e. livenessLightweight(). However, for some
350 // reason that slows down the rest of the translation. Investigate.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700351 Func->liveness(Liveness_Basic);
352 if (Func->hasError())
353 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700354 Func->dump("After x86 address mode opt");
Matt Wala45a06232014-07-09 16:33:22 -0700355
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700356 Func->genCode();
357 if (Func->hasError())
358 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700359
360 // Register allocation. This requires instruction renumbering and
361 // full liveness analysis.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700362 Func->renumberInstructions();
363 if (Func->hasError())
364 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700365 Func->liveness(Liveness_Intervals);
366 if (Func->hasError())
367 return;
Jim Stichnoth9c234e22014-10-01 09:28:21 -0700368 // Validate the live range computations. The expensive validation
369 // call is deliberately only made when assertions are enabled.
370 assert(Func->validateLiveness());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700371 ComputedLiveRanges = true;
372 // The post-codegen dump is done here, after liveness analysis and
373 // associated cleanup, to make the dump cleaner and more useful.
374 Func->dump("After initial x8632 codegen");
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700375 Func->getVMetadata()->init(VMK_All);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700376 regAlloc();
377 if (Func->hasError())
378 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700379 Func->dump("After linear scan regalloc");
380
381 // Stack frame mapping.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700382 Func->genFrame();
383 if (Func->hasError())
384 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700385 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700386
Jim Stichnoth98712a32014-10-24 10:59:02 -0700387 Func->deleteRedundantAssignments();
388
Jim Stichnothff9c7062014-09-18 04:50:49 -0700389 // Branch optimization. This needs to be done just before code
390 // emission. In particular, no transformations that insert or
391 // reorder CfgNodes should be done after branch optimization. We go
392 // ahead and do it before nop insertion to reduce the amount of work
393 // needed for searching for opportunities.
394 Func->doBranchOpt();
395 Func->dump("After branch optimization");
396
Matt Walac3302742014-08-15 16:21:56 -0700397 // Nop insertion
398 if (shouldDoNopInsertion()) {
399 Func->doNopInsertion();
400 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700401}
402
403void TargetX8632::translateOm1() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700404 TimerMarker T(TimerStack::TT_Om1, Func);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700405 Func->placePhiLoads();
406 if (Func->hasError())
407 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700408 Func->placePhiStores();
409 if (Func->hasError())
410 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700411 Func->deletePhis();
412 if (Func->hasError())
413 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700414 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700415
Matt Wala45a06232014-07-09 16:33:22 -0700416 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700417
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700418 Func->genCode();
419 if (Func->hasError())
420 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700421 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700422
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700423 Func->genFrame();
424 if (Func->hasError())
425 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700426 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700427
Jim Stichnoth98712a32014-10-24 10:59:02 -0700428 Func->deleteRedundantAssignments();
429
Matt Walac3302742014-08-15 16:21:56 -0700430 // Nop insertion
431 if (shouldDoNopInsertion()) {
432 Func->doNopInsertion();
433 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700434}
435
Jim Stichnothff9c7062014-09-18 04:50:49 -0700436bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
437 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
438 return Br->optimizeBranch(NextNode);
439 }
440 return false;
441}
442
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700443IceString TargetX8632::RegNames[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700444#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700445 frameptr, isI8, isInt, isFP) \
446 name,
447 REGX8632_TABLE
448#undef X
449};
450
Jim Stichnoth98712a32014-10-24 10:59:02 -0700451Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {
452 if (Ty == IceType_void)
453 Ty = IceType_i32;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700454 assert(RegNum < PhysicalRegisters.size());
455 Variable *Reg = PhysicalRegisters[RegNum];
456 if (Reg == NULL) {
Jim Stichnoth98712a32014-10-24 10:59:02 -0700457 Reg = Func->makeVariable(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700458 Reg->setRegNum(RegNum);
459 PhysicalRegisters[RegNum] = Reg;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700460 // Specially mark esp as an "argument" so that it is considered
461 // live upon function entry.
Jim Stichnoth47752552014-10-13 17:15:08 -0700462 if (RegNum == RegX8632::Reg_esp) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700463 Func->addImplicitArg(Reg);
Jim Stichnoth47752552014-10-13 17:15:08 -0700464 Reg->setIgnoreLiveness();
465 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700466 }
467 return Reg;
468}
469
470IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
Jan Voungbd385e42014-09-18 18:18:10 -0700471 assert(RegNum < RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700472 static IceString RegNames8[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700473#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700474 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700475 name8,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700476 REGX8632_TABLE
477#undef X
478 };
479 static IceString RegNames16[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700480#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700481 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700482 name16,
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700483 REGX8632_TABLE
484#undef X
485 };
486 switch (Ty) {
487 case IceType_i1:
488 case IceType_i8:
489 return RegNames8[RegNum];
490 case IceType_i16:
491 return RegNames16[RegNum];
492 default:
493 return RegNames[RegNum];
494 }
495}
496
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700497void TargetX8632::emitVariable(const Variable *Var) const {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700498 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700499 if (Var->hasReg()) {
500 Str << getRegName(Var->getRegNum(), Var->getType());
501 return;
502 }
503 Str << InstX8632::getWidthString(Var->getType());
504 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700505 int32_t Offset = Var->getStackOffset();
506 if (!hasFramePointer())
507 Offset += getStackAdjustment();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700508 if (Offset) {
509 if (Offset > 0)
510 Str << "+";
511 Str << Offset;
512 }
513 Str << "]";
514}
515
Jan Voung8acded02014-09-22 18:02:25 -0700516x86::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
517 assert(!Var->hasReg());
518 int32_t Offset = Var->getStackOffset();
519 if (!hasFramePointer())
520 Offset += getStackAdjustment();
521 return x86::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
522}
523
Matt Wala45a06232014-07-09 16:33:22 -0700524void TargetX8632::lowerArguments() {
525 VarList &Args = Func->getArgs();
526 // The first four arguments of vector type, regardless of their
527 // position relative to the other arguments in the argument list, are
528 // passed in registers xmm0 - xmm3.
529 unsigned NumXmmArgs = 0;
530
531 Context.init(Func->getEntryNode());
532 Context.setInsertPoint(Context.getCur());
533
534 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
535 ++I) {
536 Variable *Arg = Args[I];
537 Type Ty = Arg->getType();
538 if (!isVectorType(Ty))
539 continue;
540 // Replace Arg in the argument list with the home register. Then
541 // generate an instruction in the prolog to copy the home register
542 // to the assigned location of Arg.
Jan Voungbd385e42014-09-18 18:18:10 -0700543 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
Matt Wala45a06232014-07-09 16:33:22 -0700544 ++NumXmmArgs;
545 IceString Name = "home_reg:" + Arg->getName();
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700546 Variable *RegisterArg = Func->makeVariable(Ty, Name);
Matt Wala45a06232014-07-09 16:33:22 -0700547 RegisterArg->setRegNum(RegNum);
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700548 RegisterArg->setIsArg();
549 Arg->setIsArg(false);
Matt Wala45a06232014-07-09 16:33:22 -0700550
551 Args[I] = RegisterArg;
552 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
553 }
554}
555
Matt Walad4799f42014-08-14 14:24:12 -0700556void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
557 // Sort the variables into buckets according to the log of their width
558 // in bytes.
559 const SizeT NumBuckets =
560 X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1;
561 VarList Buckets[NumBuckets];
562
Jim Stichnothf44f3712014-10-01 14:05:51 -0700563 for (Variable *Var : Source) {
Matt Walad4799f42014-08-14 14:24:12 -0700564 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());
Jan Voung1eb3a552014-09-12 11:05:47 -0700565 SizeT LogNaturalAlignment = llvm::findFirstSet(NaturalAlignment);
Matt Walad4799f42014-08-14 14:24:12 -0700566 assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE);
567 assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE);
568 SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE;
569 Buckets[BucketIndex].push_back(Var);
570 }
571
572 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {
573 VarList &List = Buckets[NumBuckets - I - 1];
574 Dest.insert(Dest.end(), List.begin(), List.end());
575 }
576}
577
Matt Wala45a06232014-07-09 16:33:22 -0700578// Helper function for addProlog().
579//
580// This assumes Arg is an argument passed on the stack. This sets the
581// frame offset for Arg and updates InArgsSizeBytes according to Arg's
582// width. For an I64 arg that has been split into Lo and Hi components,
583// it calls itself recursively on the components, taking care to handle
584// Lo first because of the little-endian architecture. Lastly, this
585// function generates an instruction to copy Arg into its assigned
586// register if applicable.
587void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
588 size_t BasicFrameOffset,
589 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700590 Variable *Lo = Arg->getLo();
591 Variable *Hi = Arg->getHi();
592 Type Ty = Arg->getType();
593 if (Lo && Hi && Ty == IceType_i64) {
594 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
595 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
Matt Wala45a06232014-07-09 16:33:22 -0700596 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
597 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700598 return;
599 }
Matt Wala105b7042014-08-11 19:56:19 -0700600 if (isVectorType(Ty)) {
601 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
602 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700603 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
Matt Wala45a06232014-07-09 16:33:22 -0700604 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700605 if (Arg->hasReg()) {
606 assert(Ty != IceType_i64);
607 OperandX8632Mem *Mem = OperandX8632Mem::create(
608 Func, Ty, FramePtr,
Jan Voungbc004632014-09-16 15:09:10 -0700609 Ctx->getConstantInt32(IceType_i32, Arg->getStackOffset()));
Matt Wala45a06232014-07-09 16:33:22 -0700610 if (isVectorType(Arg->getType())) {
611 _movp(Arg, Mem);
612 } else {
613 _mov(Arg, Mem);
614 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700615 // This argument-copying instruction uses an explicit
616 // OperandX8632Mem operand instead of a Variable, so its
617 // fill-from-stack operation has to be tracked separately for
618 // statistics.
619 Ctx->statsUpdateFills();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700620 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700621}
622
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700623Type TargetX8632::stackSlotType() { return IceType_i32; }
624
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700625void TargetX8632::addProlog(CfgNode *Node) {
Matt Walad4799f42014-08-14 14:24:12 -0700626 // Stack frame layout:
627 //
628 // +------------------------+
629 // | 1. return address |
630 // +------------------------+
631 // | 2. preserved registers |
632 // +------------------------+
633 // | 3. padding |
634 // +------------------------+
635 // | 4. global spill area |
636 // +------------------------+
637 // | 5. padding |
638 // +------------------------+
639 // | 6. local spill area |
640 // +------------------------+
641 // | 7. padding |
642 // +------------------------+
643 // | 8. allocas |
644 // +------------------------+
645 //
646 // The following variables record the size in bytes of the given areas:
647 // * X86_RET_IP_SIZE_BYTES: area 1
648 // * PreservedRegsSizeBytes: area 2
649 // * SpillAreaPaddingBytes: area 3
650 // * GlobalsSize: area 4
651 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
652 // * LocalsSpillAreaSize: area 6
653 // * SpillAreaSizeBytes: areas 3 - 7
654
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700655 // If SimpleCoalescing is false, each variable without a register
656 // gets its own unique stack slot, which leads to large stack
657 // frames. If SimpleCoalescing is true, then each "global" variable
658 // without a register gets its own slot, but "local" variable slots
659 // are reused across basic blocks. E.g., if A and B are local to
Jan Voung44d53e12014-09-11 19:18:03 -0700660 // block 1 and C is local to block 2, then C may share a slot with A or B.
661 //
662 // We cannot coalesce stack slots if this function calls a "returns twice"
663 // function. In that case, basic blocks may be revisited, and variables
664 // local to those basic blocks are actually live until after the
665 // called function returns a second time.
666 const bool SimpleCoalescing = !callsReturnsTwice();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700667 size_t InArgsSizeBytes = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700668 size_t PreservedRegsSizeBytes = 0;
Matt Walad4799f42014-08-14 14:24:12 -0700669 SpillAreaSizeBytes = 0;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700670 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700671 Context.init(Node);
672 Context.setInsertPoint(Context.getCur());
673
674 // Determine stack frame offsets for each Variable without a
675 // register assignment. This can be done as one variable per stack
676 // slot. Or, do coalescing by running the register allocator again
677 // with an infinite set of registers (as a side effect, this gives
678 // variables a second chance at physical register assignment).
679 //
680 // A middle ground approach is to leverage sparsity and allocate one
681 // block of space on the frame for globals (variables with
682 // multi-block lifetime), and one block to share for locals
683 // (single-block lifetime).
684
685 llvm::SmallBitVector CalleeSaves =
686 getRegisterSet(RegSet_CalleeSave, RegSet_None);
687
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700688 size_t GlobalsSize = 0;
689 std::vector<size_t> LocalsSize(Func->getNumNodes());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700690
691 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
Matt Walad4799f42014-08-14 14:24:12 -0700692 // SpillAreaSizeBytes.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700693 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
694 const VarList &Variables = Func->getVariables();
695 const VarList &Args = Func->getArgs();
Jim Stichnoth800dab22014-09-20 12:25:02 -0700696 VarList SpilledVariables, SortedSpilledVariables, VariablesLinkedToSpillSlots;
Matt Walad4799f42014-08-14 14:24:12 -0700697
698 // If there is a separate locals area, this specifies the alignment
699 // for it.
700 uint32_t LocalsSlotsAlignmentBytes = 0;
701 // The entire spill locations area gets aligned to largest natural
702 // alignment of the variables that have a spill slot.
703 uint32_t SpillAreaAlignmentBytes = 0;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700704 for (Variable *Var : Variables) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700705 if (Var->hasReg()) {
706 RegsUsed[Var->getRegNum()] = true;
707 continue;
708 }
Matt Wala45a06232014-07-09 16:33:22 -0700709 // An argument either does not need a stack slot (if passed in a
710 // register) or already has one (if passed on the stack).
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700711 if (Var->getIsArg())
712 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700713 // An unreferenced variable doesn't need a stack slot.
714 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
715 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700716 // A spill slot linked to a variable with a stack slot should reuse
717 // that stack slot.
Jim Stichnoth800dab22014-09-20 12:25:02 -0700718 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
719 assert(Var->getWeight() == RegWeight::Zero);
720 if (!SpillVar->getLinkedTo()->hasReg()) {
721 VariablesLinkedToSpillSlots.push_back(Var);
722 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700723 }
724 }
Matt Walad4799f42014-08-14 14:24:12 -0700725 SpilledVariables.push_back(Var);
726 }
727
728 SortedSpilledVariables.reserve(SpilledVariables.size());
729 sortByAlignment(SortedSpilledVariables, SpilledVariables);
Jim Stichnothf44f3712014-10-01 14:05:51 -0700730 for (Variable *Var : SortedSpilledVariables) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700731 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Matt Walad4799f42014-08-14 14:24:12 -0700732 if (!SpillAreaAlignmentBytes)
733 SpillAreaAlignmentBytes = Increment;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700734 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
735 if (VMetadata->isMultiBlock(Var)) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700736 GlobalsSize += Increment;
737 } else {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700738 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700739 LocalsSize[NodeIndex] += Increment;
Matt Walad4799f42014-08-14 14:24:12 -0700740 if (LocalsSize[NodeIndex] > SpillAreaSizeBytes)
741 SpillAreaSizeBytes = LocalsSize[NodeIndex];
742 if (!LocalsSlotsAlignmentBytes)
743 LocalsSlotsAlignmentBytes = Increment;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700744 }
745 } else {
Matt Walad4799f42014-08-14 14:24:12 -0700746 SpillAreaSizeBytes += Increment;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700747 }
748 }
Matt Walad4799f42014-08-14 14:24:12 -0700749 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
750
751 SpillAreaSizeBytes += GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700752
753 // Add push instructions for preserved registers.
Jim Stichnoth18735602014-09-16 19:59:35 -0700754 uint32_t NumCallee = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700755 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
756 if (CalleeSaves[i] && RegsUsed[i]) {
Jim Stichnoth18735602014-09-16 19:59:35 -0700757 ++NumCallee;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700758 PreservedRegsSizeBytes += 4;
Jan Voung0b9eee52014-10-07 11:20:10 -0700759 _push(getPhysicalRegister(i));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700760 }
761 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700762 Ctx->statsUpdateRegistersSaved(NumCallee);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700763
764 // Generate "push ebp; mov ebp, esp"
765 if (IsEbpBasedFrame) {
766 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
767 .count() == 0);
768 PreservedRegsSizeBytes += 4;
Jan Voungbd385e42014-09-18 18:18:10 -0700769 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
770 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung0b9eee52014-10-07 11:20:10 -0700771 _push(ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700772 _mov(ebp, esp);
773 }
774
Matt Walad4799f42014-08-14 14:24:12 -0700775 // Align the variables area. SpillAreaPaddingBytes is the size of
776 // the region after the preserved registers and before the spill
777 // areas.
778 uint32_t SpillAreaPaddingBytes = 0;
779 if (SpillAreaAlignmentBytes) {
780 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
781 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
782 uint32_t SpillAreaStart =
783 applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
784 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
785 SpillAreaSizeBytes += SpillAreaPaddingBytes;
Matt Wala105b7042014-08-11 19:56:19 -0700786 }
787
Matt Walad4799f42014-08-14 14:24:12 -0700788 // If there are separate globals and locals areas, make sure the
789 // locals area is aligned by padding the end of the globals area.
790 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
791 if (LocalsSlotsAlignmentBytes) {
792 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
793 GlobalsAndSubsequentPaddingSize =
794 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
795 SpillAreaSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;
796 }
797
798 // Align esp if necessary.
799 if (NeedsStackAlignment) {
800 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
801 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
802 SpillAreaSizeBytes = StackSize - StackOffset;
803 }
804
805 // Generate "sub esp, SpillAreaSizeBytes"
806 if (SpillAreaSizeBytes)
Jan Voungbd385e42014-09-18 18:18:10 -0700807 _sub(getPhysicalRegister(RegX8632::Reg_esp),
Jan Voungbc004632014-09-16 15:09:10 -0700808 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
Jim Stichnoth18735602014-09-16 19:59:35 -0700809 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700810
811 resetStackAdjustment();
812
Matt Wala45a06232014-07-09 16:33:22 -0700813 // Fill in stack offsets for stack args, and copy args into registers
814 // for those that were register-allocated. Args are pushed right to
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700815 // left, so Arg[0] is closest to the stack/frame pointer.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700816 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Matt Wala105b7042014-08-11 19:56:19 -0700817 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700818 if (!IsEbpBasedFrame)
Matt Walad4799f42014-08-14 14:24:12 -0700819 BasicFrameOffset += SpillAreaSizeBytes;
Matt Wala45a06232014-07-09 16:33:22 -0700820
821 unsigned NumXmmArgs = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700822 for (SizeT i = 0; i < Args.size(); ++i) {
823 Variable *Arg = Args[i];
Matt Wala45a06232014-07-09 16:33:22 -0700824 // Skip arguments passed in registers.
825 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
826 ++NumXmmArgs;
827 continue;
828 }
829 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700830 }
831
832 // Fill in stack offsets for locals.
Matt Walad4799f42014-08-14 14:24:12 -0700833 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700834 LocalsSize.assign(LocalsSize.size(), 0);
Matt Walad4799f42014-08-14 14:24:12 -0700835 size_t NextStackOffset = GlobalsSpaceUsed;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700836 for (Variable *Var : SortedSpilledVariables) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700837 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700838 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
839 if (VMetadata->isMultiBlock(Var)) {
Matt Walad4799f42014-08-14 14:24:12 -0700840 GlobalsSpaceUsed += Increment;
841 NextStackOffset = GlobalsSpaceUsed;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700842 } else {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700843 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700844 LocalsSize[NodeIndex] += Increment;
Matt Walad4799f42014-08-14 14:24:12 -0700845 NextStackOffset = SpillAreaPaddingBytes +
846 GlobalsAndSubsequentPaddingSize +
847 LocalsSize[NodeIndex];
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700848 }
849 } else {
850 NextStackOffset += Increment;
851 }
852 if (IsEbpBasedFrame)
853 Var->setStackOffset(-NextStackOffset);
854 else
Matt Walad4799f42014-08-14 14:24:12 -0700855 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700856 }
Matt Walad4799f42014-08-14 14:24:12 -0700857 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700858 this->HasComputedFrame = true;
859
Matt Walad4799f42014-08-14 14:24:12 -0700860 // Assign stack offsets to variables that have been linked to spilled
861 // variables.
Jim Stichnothf44f3712014-10-01 14:05:51 -0700862 for (Variable *Var : VariablesLinkedToSpillSlots) {
Jim Stichnoth800dab22014-09-20 12:25:02 -0700863 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
Matt Walad4799f42014-08-14 14:24:12 -0700864 Var->setStackOffset(Linked->getStackOffset());
865 }
866
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700867 if (Func->getContext()->isVerbose(IceV_Frame)) {
Matt Walad4799f42014-08-14 14:24:12 -0700868 Ostream &Str = Func->getContext()->getStrDump();
869
870 Str << "Stack layout:\n";
871 uint32_t EspAdjustmentPaddingSize =
872 SpillAreaSizeBytes - LocalsSpillAreaSize -
873 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
874 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
875 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
876 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
877 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
878 << " globals spill area = " << GlobalsSize << " bytes\n"
879 << " globals-locals spill areas intermediate padding = "
880 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
881 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
882 << " esp alignment padding = " << EspAdjustmentPaddingSize
883 << " bytes\n";
884
885 Str << "Stack details:\n"
886 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
887 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
888 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
889 << " bytes\n"
890 << " is ebp based = " << IsEbpBasedFrame << "\n";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700891 }
892}
893
894void TargetX8632::addEpilog(CfgNode *Node) {
895 InstList &Insts = Node->getInsts();
896 InstList::reverse_iterator RI, E;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700897 // TODO(stichnot): Use llvm::make_range with LLVM 3.5.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700898 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
899 if (llvm::isa<InstX8632Ret>(*RI))
900 break;
901 }
902 if (RI == E)
903 return;
904
905 // Convert the reverse_iterator position into its corresponding
906 // (forward) iterator position.
907 InstList::iterator InsertPoint = RI.base();
908 --InsertPoint;
909 Context.init(Node);
910 Context.setInsertPoint(InsertPoint);
911
Jan Voungbd385e42014-09-18 18:18:10 -0700912 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700913 if (IsEbpBasedFrame) {
Jan Voungbd385e42014-09-18 18:18:10 -0700914 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700915 _mov(esp, ebp);
916 _pop(ebp);
917 } else {
Matt Walad4799f42014-08-14 14:24:12 -0700918 // add esp, SpillAreaSizeBytes
919 if (SpillAreaSizeBytes)
Jan Voungbc004632014-09-16 15:09:10 -0700920 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700921 }
922
923 // Add pop instructions for preserved registers.
924 llvm::SmallBitVector CalleeSaves =
925 getRegisterSet(RegSet_CalleeSave, RegSet_None);
926 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
927 SizeT j = CalleeSaves.size() - i - 1;
Jan Voungbd385e42014-09-18 18:18:10 -0700928 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700929 continue;
930 if (CalleeSaves[j] && RegsUsed[j]) {
931 _pop(getPhysicalRegister(j));
932 }
933 }
934}
935
Jim Stichnothf61d5b22014-05-23 13:31:24 -0700936template <typename T> struct PoolTypeConverter {};
937
938template <> struct PoolTypeConverter<float> {
939 typedef float PrimitiveFpType;
940 typedef uint32_t PrimitiveIntType;
941 typedef ConstantFloat IceType;
942 static const Type Ty = IceType_f32;
943 static const char *TypeName;
944 static const char *AsmTag;
945 static const char *PrintfString;
946};
947const char *PoolTypeConverter<float>::TypeName = "float";
948const char *PoolTypeConverter<float>::AsmTag = ".long";
949const char *PoolTypeConverter<float>::PrintfString = "0x%x";
950
951template <> struct PoolTypeConverter<double> {
952 typedef double PrimitiveFpType;
953 typedef uint64_t PrimitiveIntType;
954 typedef ConstantDouble IceType;
955 static const Type Ty = IceType_f64;
956 static const char *TypeName;
957 static const char *AsmTag;
958 static const char *PrintfString;
959};
960const char *PoolTypeConverter<double>::TypeName = "double";
961const char *PoolTypeConverter<double>::AsmTag = ".quad";
962const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
963
964template <typename T> void TargetX8632::emitConstantPool() const {
965 Ostream &Str = Ctx->getStrEmit();
966 Type Ty = T::Ty;
967 SizeT Align = typeAlignInBytes(Ty);
968 ConstantList Pool = Ctx->getConstantPool(Ty);
969
970 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
971 << "\n";
972 Str << "\t.align\t" << Align << "\n";
Jim Stichnothf44f3712014-10-01 14:05:51 -0700973 for (Constant *C : Pool) {
974 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
Jim Stichnothf61d5b22014-05-23 13:31:24 -0700975 typename T::PrimitiveFpType Value = Const->getValue();
976 // Use memcpy() to copy bits from Value into RawValue in a way
977 // that avoids breaking strict-aliasing rules.
978 typename T::PrimitiveIntType RawValue;
979 memcpy(&RawValue, &Value, sizeof(Value));
980 char buf[30];
981 int CharsPrinted =
982 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
983 assert(CharsPrinted >= 0 &&
984 (size_t)CharsPrinted < llvm::array_lengthof(buf));
985 (void)CharsPrinted; // avoid warnings if asserts are disabled
986 Str << "L$" << Ty << "$" << Const->getPoolEntryID() << ":\n";
987 Str << "\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
988 << Value << "\n";
989 }
990}
991
992void TargetX8632::emitConstants() const {
993 emitConstantPool<PoolTypeConverter<float> >();
994 emitConstantPool<PoolTypeConverter<double> >();
995
996 // No need to emit constants from the int pool since (for x86) they
997 // are embedded as immediates in the instructions.
998}
999
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001000void TargetX8632::split64(Variable *Var) {
1001 switch (Var->getType()) {
1002 default:
1003 return;
1004 case IceType_i64:
1005 // TODO: Only consider F64 if we need to push each half when
1006 // passing as an argument to a function call. Note that each half
1007 // is still typed as I32.
1008 case IceType_f64:
1009 break;
1010 }
1011 Variable *Lo = Var->getLo();
1012 Variable *Hi = Var->getHi();
1013 if (Lo) {
1014 assert(Hi);
1015 return;
1016 }
1017 assert(Hi == NULL);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07001018 Lo = Func->makeVariable(IceType_i32, Var->getName() + "__lo");
1019 Hi = Func->makeVariable(IceType_i32, Var->getName() + "__hi");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001020 Var->setLoHi(Lo, Hi);
1021 if (Var->getIsArg()) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07001022 Lo->setIsArg();
1023 Hi->setIsArg();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001024 }
1025}
1026
1027Operand *TargetX8632::loOperand(Operand *Operand) {
1028 assert(Operand->getType() == IceType_i64);
1029 if (Operand->getType() != IceType_i64)
1030 return Operand;
1031 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1032 split64(Var);
1033 return Var->getLo();
1034 }
Jan Voungbc004632014-09-16 15:09:10 -07001035 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1036 return Ctx->getConstantInt32(IceType_i32,
1037 static_cast<uint32_t>(Const->getValue()));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001038 }
1039 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1040 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
1041 Mem->getOffset(), Mem->getIndex(),
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001042 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001043 }
1044 llvm_unreachable("Unsupported operand type");
1045 return NULL;
1046}
1047
1048Operand *TargetX8632::hiOperand(Operand *Operand) {
1049 assert(Operand->getType() == IceType_i64);
1050 if (Operand->getType() != IceType_i64)
1051 return Operand;
1052 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1053 split64(Var);
1054 return Var->getHi();
1055 }
Jan Voungbc004632014-09-16 15:09:10 -07001056 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1057 return Ctx->getConstantInt32(
1058 IceType_i32, static_cast<uint32_t>(Const->getValue() >> 32));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001059 }
1060 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1061 Constant *Offset = Mem->getOffset();
1062 if (Offset == NULL)
Jan Voungbc004632014-09-16 15:09:10 -07001063 Offset = Ctx->getConstantInt32(IceType_i32, 4);
1064 else if (ConstantInteger32 *IntOffset =
1065 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1066 Offset = Ctx->getConstantInt32(IceType_i32, 4 + IntOffset->getValue());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001067 } else if (ConstantRelocatable *SymOffset =
1068 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
Jan Voungfe14fb82014-10-13 15:56:32 -07001069 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001070 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
1071 SymOffset->getName());
1072 }
1073 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001074 Mem->getIndex(), Mem->getShift(),
1075 Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001076 }
1077 llvm_unreachable("Unsupported operand type");
1078 return NULL;
1079}
1080
1081llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
1082 RegSetMask Exclude) const {
Jan Voungbd385e42014-09-18 18:18:10 -07001083 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001084
Jan Voungbd385e42014-09-18 18:18:10 -07001085#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001086 frameptr, isI8, isInt, isFP) \
1087 if (scratch && (Include & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001088 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001089 if (preserved && (Include & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001090 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001091 if (stackptr && (Include & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001092 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001093 if (frameptr && (Include & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001094 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001095 if (scratch && (Exclude & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001096 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001097 if (preserved && (Exclude & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001098 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001099 if (stackptr && (Exclude & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001100 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001101 if (frameptr && (Exclude & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001102 Registers[RegX8632::val] = false;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001103
1104 REGX8632_TABLE
1105
1106#undef X
1107
1108 return Registers;
1109}
1110
1111void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
1112 IsEbpBasedFrame = true;
Matt Wala105b7042014-08-11 19:56:19 -07001113 // Conservatively require the stack to be aligned. Some stack
1114 // adjustment operations implemented below assume that the stack is
1115 // aligned before the alloca. All the alloca code ensures that the
1116 // stack alignment is preserved after the alloca. The stack alignment
1117 // restriction can be relaxed in some cases.
1118 NeedsStackAlignment = true;
1119
1120 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.
Jan Voungbd385e42014-09-18 18:18:10 -07001121 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001122 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1123 Variable *Dest = Inst->getDest();
Matt Wala105b7042014-08-11 19:56:19 -07001124 uint32_t AlignmentParam = Inst->getAlignInBytes();
Jim Stichnoth72a8f8d2014-09-08 17:56:50 -07001125 // For default align=0, set it to the real value 1, to avoid any
1126 // bit-manipulation problems below.
1127 AlignmentParam = std::max(AlignmentParam, 1u);
Matt Wala105b7042014-08-11 19:56:19 -07001128
1129 // LLVM enforces power of 2 alignment.
1130 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
1131 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
1132
1133 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1134 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
Jan Voungbc004632014-09-16 15:09:10 -07001135 _and(esp, Ctx->getConstantInt32(IceType_i32, -Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001136 }
Jan Voungbc004632014-09-16 15:09:10 -07001137 if (ConstantInteger32 *ConstantTotalSize =
1138 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
Matt Wala105b7042014-08-11 19:56:19 -07001139 uint32_t Value = ConstantTotalSize->getValue();
Matt Walad4799f42014-08-14 14:24:12 -07001140 Value = applyAlignment(Value, Alignment);
Jan Voungbc004632014-09-16 15:09:10 -07001141 _sub(esp, Ctx->getConstantInt32(IceType_i32, Value));
Matt Wala105b7042014-08-11 19:56:19 -07001142 } else {
1143 // Non-constant sizes need to be adjusted to the next highest
1144 // multiple of the required alignment at runtime.
1145 Variable *T = makeReg(IceType_i32);
1146 _mov(T, TotalSize);
Jan Voungbc004632014-09-16 15:09:10 -07001147 _add(T, Ctx->getConstantInt32(IceType_i32, Alignment - 1));
1148 _and(T, Ctx->getConstantInt32(IceType_i32, -Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001149 _sub(esp, T);
1150 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001151 _mov(Dest, esp);
1152}
1153
1154void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1155 Variable *Dest = Inst->getDest();
1156 Operand *Src0 = legalize(Inst->getSrc(0));
1157 Operand *Src1 = legalize(Inst->getSrc(1));
1158 if (Dest->getType() == IceType_i64) {
1159 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1160 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1161 Operand *Src0Lo = loOperand(Src0);
1162 Operand *Src0Hi = hiOperand(Src0);
1163 Operand *Src1Lo = loOperand(Src1);
1164 Operand *Src1Hi = hiOperand(Src1);
1165 Variable *T_Lo = NULL, *T_Hi = NULL;
1166 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001167 case InstArithmetic::_num:
1168 llvm_unreachable("Unknown arithmetic operator");
1169 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001170 case InstArithmetic::Add:
1171 _mov(T_Lo, Src0Lo);
1172 _add(T_Lo, Src1Lo);
1173 _mov(DestLo, T_Lo);
1174 _mov(T_Hi, Src0Hi);
1175 _adc(T_Hi, Src1Hi);
1176 _mov(DestHi, T_Hi);
1177 break;
1178 case InstArithmetic::And:
1179 _mov(T_Lo, Src0Lo);
1180 _and(T_Lo, Src1Lo);
1181 _mov(DestLo, T_Lo);
1182 _mov(T_Hi, Src0Hi);
1183 _and(T_Hi, Src1Hi);
1184 _mov(DestHi, T_Hi);
1185 break;
1186 case InstArithmetic::Or:
1187 _mov(T_Lo, Src0Lo);
1188 _or(T_Lo, Src1Lo);
1189 _mov(DestLo, T_Lo);
1190 _mov(T_Hi, Src0Hi);
1191 _or(T_Hi, Src1Hi);
1192 _mov(DestHi, T_Hi);
1193 break;
1194 case InstArithmetic::Xor:
1195 _mov(T_Lo, Src0Lo);
1196 _xor(T_Lo, Src1Lo);
1197 _mov(DestLo, T_Lo);
1198 _mov(T_Hi, Src0Hi);
1199 _xor(T_Hi, Src1Hi);
1200 _mov(DestHi, T_Hi);
1201 break;
1202 case InstArithmetic::Sub:
1203 _mov(T_Lo, Src0Lo);
1204 _sub(T_Lo, Src1Lo);
1205 _mov(DestLo, T_Lo);
1206 _mov(T_Hi, Src0Hi);
1207 _sbb(T_Hi, Src1Hi);
1208 _mov(DestHi, T_Hi);
1209 break;
1210 case InstArithmetic::Mul: {
1211 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
Jan Voungbd385e42014-09-18 18:18:10 -07001212 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
1213 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001214 // gcc does the following:
1215 // a=b*c ==>
1216 // t1 = b.hi; t1 *=(imul) c.lo
1217 // t2 = c.hi; t2 *=(imul) b.lo
1218 // t3:eax = b.lo
1219 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1220 // a.lo = t4.lo
1221 // t4.hi += t1
1222 // t4.hi += t2
1223 // a.hi = t4.hi
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07001224 // The mul instruction cannot take an immediate operand.
1225 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001226 _mov(T_1, Src0Hi);
1227 _imul(T_1, Src1Lo);
1228 _mov(T_2, Src1Hi);
1229 _imul(T_2, Src0Lo);
Jan Voungbd385e42014-09-18 18:18:10 -07001230 _mov(T_3, Src0Lo, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001231 _mul(T_4Lo, T_3, Src1Lo);
1232 // The mul instruction produces two dest variables, edx:eax. We
1233 // create a fake definition of edx to account for this.
1234 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1235 _mov(DestLo, T_4Lo);
1236 _add(T_4Hi, T_1);
1237 _add(T_4Hi, T_2);
1238 _mov(DestHi, T_4Hi);
1239 } break;
1240 case InstArithmetic::Shl: {
1241 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1242 // gcc does the following:
1243 // a=b<<c ==>
1244 // t1:ecx = c.lo & 0xff
1245 // t2 = b.lo
1246 // t3 = b.hi
1247 // t3 = shld t3, t2, t1
1248 // t2 = shl t2, t1
1249 // test t1, 0x20
1250 // je L1
1251 // use(t3)
1252 // t3 = t2
1253 // t2 = 0
1254 // L1:
1255 // a.lo = t2
1256 // a.hi = t3
1257 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
Jan Voungbc004632014-09-16 15:09:10 -07001258 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001259 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001260 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001261 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001262 _mov(T_2, Src0Lo);
1263 _mov(T_3, Src0Hi);
1264 _shld(T_3, T_2, T_1);
1265 _shl(T_2, T_1);
1266 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001267 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001268 // T_2 and T_3 are being assigned again because of the
1269 // intra-block control flow, so we need the _mov_nonkillable
1270 // variant to avoid liveness problems.
1271 _mov_nonkillable(T_3, T_2);
1272 _mov_nonkillable(T_2, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001273 Context.insert(Label);
1274 _mov(DestLo, T_2);
1275 _mov(DestHi, T_3);
1276 } break;
1277 case InstArithmetic::Lshr: {
1278 // a=b>>c (unsigned) ==>
1279 // t1:ecx = c.lo & 0xff
1280 // t2 = b.lo
1281 // t3 = b.hi
1282 // t2 = shrd t2, t3, t1
1283 // t3 = shr t3, t1
1284 // test t1, 0x20
1285 // je L1
1286 // use(t2)
1287 // t2 = t3
1288 // t3 = 0
1289 // L1:
1290 // a.lo = t2
1291 // a.hi = t3
1292 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
Jan Voungbc004632014-09-16 15:09:10 -07001293 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001294 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001295 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001296 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001297 _mov(T_2, Src0Lo);
1298 _mov(T_3, Src0Hi);
1299 _shrd(T_2, T_3, T_1);
1300 _shr(T_3, T_1);
1301 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001302 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001303 // T_2 and T_3 are being assigned again because of the
1304 // intra-block control flow, so we need the _mov_nonkillable
1305 // variant to avoid liveness problems.
1306 _mov_nonkillable(T_2, T_3);
1307 _mov_nonkillable(T_3, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001308 Context.insert(Label);
1309 _mov(DestLo, T_2);
1310 _mov(DestHi, T_3);
1311 } break;
1312 case InstArithmetic::Ashr: {
1313 // a=b>>c (signed) ==>
1314 // t1:ecx = c.lo & 0xff
1315 // t2 = b.lo
1316 // t3 = b.hi
1317 // t2 = shrd t2, t3, t1
1318 // t3 = sar t3, t1
1319 // test t1, 0x20
1320 // je L1
1321 // use(t2)
1322 // t2 = t3
1323 // t3 = sar t3, 0x1f
1324 // L1:
1325 // a.lo = t2
1326 // a.hi = t3
1327 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
Jan Voungbc004632014-09-16 15:09:10 -07001328 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1329 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001330 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001331 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001332 _mov(T_2, Src0Lo);
1333 _mov(T_3, Src0Hi);
1334 _shrd(T_2, T_3, T_1);
1335 _sar(T_3, T_1);
1336 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001337 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001338 // T_2 and T_3 are being assigned again because of the
1339 // intra-block control flow, so T_2 needs the _mov_nonkillable
1340 // variant to avoid liveness problems. T_3 doesn't need special
1341 // treatment because it is reassigned via _sar instead of _mov.
1342 _mov_nonkillable(T_2, T_3);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001343 _sar(T_3, SignExtend);
1344 Context.insert(Label);
1345 _mov(DestLo, T_2);
1346 _mov(DestHi, T_3);
1347 } break;
1348 case InstArithmetic::Udiv: {
1349 const SizeT MaxSrcs = 2;
1350 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
1351 Call->addArg(Inst->getSrc(0));
1352 Call->addArg(Inst->getSrc(1));
1353 lowerCall(Call);
1354 } break;
1355 case InstArithmetic::Sdiv: {
1356 const SizeT MaxSrcs = 2;
1357 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
1358 Call->addArg(Inst->getSrc(0));
1359 Call->addArg(Inst->getSrc(1));
1360 lowerCall(Call);
1361 } break;
1362 case InstArithmetic::Urem: {
1363 const SizeT MaxSrcs = 2;
1364 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
1365 Call->addArg(Inst->getSrc(0));
1366 Call->addArg(Inst->getSrc(1));
1367 lowerCall(Call);
1368 } break;
1369 case InstArithmetic::Srem: {
1370 const SizeT MaxSrcs = 2;
1371 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
1372 Call->addArg(Inst->getSrc(0));
1373 Call->addArg(Inst->getSrc(1));
1374 lowerCall(Call);
1375 } break;
1376 case InstArithmetic::Fadd:
1377 case InstArithmetic::Fsub:
1378 case InstArithmetic::Fmul:
1379 case InstArithmetic::Fdiv:
1380 case InstArithmetic::Frem:
1381 llvm_unreachable("FP instruction with i64 type");
1382 break;
1383 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001384 } else if (isVectorType(Dest->getType())) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001385 // TODO: Trap on integer divide and integer modulo by zero.
1386 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
Matt Wala8d1072e2014-07-11 15:43:51 -07001387 switch (Inst->getOp()) {
1388 case InstArithmetic::_num:
1389 llvm_unreachable("Unknown arithmetic operator");
1390 break;
Matt Wala7fa22d82014-07-17 12:41:31 -07001391 case InstArithmetic::Add: {
1392 Variable *T = makeReg(Dest->getType());
1393 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001394 _padd(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001395 _movp(Dest, T);
1396 } break;
1397 case InstArithmetic::And: {
1398 Variable *T = makeReg(Dest->getType());
1399 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001400 _pand(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001401 _movp(Dest, T);
1402 } break;
1403 case InstArithmetic::Or: {
1404 Variable *T = makeReg(Dest->getType());
1405 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001406 _por(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001407 _movp(Dest, T);
1408 } break;
1409 case InstArithmetic::Xor: {
1410 Variable *T = makeReg(Dest->getType());
1411 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001412 _pxor(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001413 _movp(Dest, T);
1414 } break;
1415 case InstArithmetic::Sub: {
1416 Variable *T = makeReg(Dest->getType());
1417 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001418 _psub(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001419 _movp(Dest, T);
1420 } break;
1421 case InstArithmetic::Mul: {
Matt Wala0a450512014-07-30 12:44:39 -07001422 bool TypesAreValidForPmull =
1423 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1424 bool InstructionSetIsValidForPmull =
1425 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1426 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1427 Variable *T = makeReg(Dest->getType());
1428 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001429 _pmull(T, Src1);
Matt Wala0a450512014-07-30 12:44:39 -07001430 _movp(Dest, T);
1431 } else if (Dest->getType() == IceType_v4i32) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001432 // Lowering sequence:
1433 // Note: The mask arguments have index 0 on the left.
1434 //
1435 // movups T1, Src0
1436 // pshufd T2, Src0, {1,0,3,0}
1437 // pshufd T3, Src1, {1,0,3,0}
1438 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1439 // pmuludq T1, Src1
1440 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1441 // pmuludq T2, T3
1442 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1443 // shufps T1, T2, {0,2,0,2}
1444 // pshufd T4, T1, {0,2,1,3}
1445 // movups Dest, T4
Matt Wala7fa22d82014-07-17 12:41:31 -07001446
1447 // Mask that directs pshufd to create a vector with entries
1448 // Src[1, 0, 3, 0]
1449 const unsigned Constant1030 = 0x31;
Jan Voungbc004632014-09-16 15:09:10 -07001450 Constant *Mask1030 = Ctx->getConstantInt32(IceType_i8, Constant1030);
Matt Wala7fa22d82014-07-17 12:41:31 -07001451 // Mask that directs shufps to create a vector with entries
1452 // Dest[0, 2], Src[0, 2]
1453 const unsigned Mask0202 = 0x88;
1454 // Mask that directs pshufd to create a vector with entries
1455 // Src[0, 2, 1, 3]
1456 const unsigned Mask0213 = 0xd8;
1457 Variable *T1 = makeReg(IceType_v4i32);
1458 Variable *T2 = makeReg(IceType_v4i32);
1459 Variable *T3 = makeReg(IceType_v4i32);
1460 Variable *T4 = makeReg(IceType_v4i32);
1461 _movp(T1, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001462 _pshufd(T2, Src0, Mask1030);
1463 _pshufd(T3, Src1, Mask1030);
1464 _pmuludq(T1, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001465 _pmuludq(T2, T3);
Jan Voungbc004632014-09-16 15:09:10 -07001466 _shufps(T1, T2, Ctx->getConstantInt32(IceType_i8, Mask0202));
1467 _pshufd(T4, T1, Ctx->getConstantInt32(IceType_i8, Mask0213));
Matt Wala7fa22d82014-07-17 12:41:31 -07001468 _movp(Dest, T4);
Matt Wala7fa22d82014-07-17 12:41:31 -07001469 } else {
1470 assert(Dest->getType() == IceType_v16i8);
Matt Walaafeaee42014-08-07 13:47:30 -07001471 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001472 }
1473 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001474 case InstArithmetic::Shl:
1475 case InstArithmetic::Lshr:
1476 case InstArithmetic::Ashr:
1477 case InstArithmetic::Udiv:
1478 case InstArithmetic::Urem:
1479 case InstArithmetic::Sdiv:
1480 case InstArithmetic::Srem:
1481 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1482 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001483 case InstArithmetic::Fadd: {
1484 Variable *T = makeReg(Dest->getType());
1485 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001486 _addps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001487 _movp(Dest, T);
1488 } break;
1489 case InstArithmetic::Fsub: {
1490 Variable *T = makeReg(Dest->getType());
1491 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001492 _subps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001493 _movp(Dest, T);
1494 } break;
1495 case InstArithmetic::Fmul: {
1496 Variable *T = makeReg(Dest->getType());
1497 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001498 _mulps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001499 _movp(Dest, T);
1500 } break;
1501 case InstArithmetic::Fdiv: {
1502 Variable *T = makeReg(Dest->getType());
1503 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001504 _divps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001505 _movp(Dest, T);
1506 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001507 case InstArithmetic::Frem:
1508 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1509 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001510 }
1511 } else { // Dest->getType() is non-i64 scalar
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001512 Variable *T_edx = NULL;
1513 Variable *T = NULL;
1514 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001515 case InstArithmetic::_num:
1516 llvm_unreachable("Unknown arithmetic operator");
1517 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001518 case InstArithmetic::Add:
1519 _mov(T, Src0);
1520 _add(T, Src1);
1521 _mov(Dest, T);
1522 break;
1523 case InstArithmetic::And:
1524 _mov(T, Src0);
1525 _and(T, Src1);
1526 _mov(Dest, T);
1527 break;
1528 case InstArithmetic::Or:
1529 _mov(T, Src0);
1530 _or(T, Src1);
1531 _mov(Dest, T);
1532 break;
1533 case InstArithmetic::Xor:
1534 _mov(T, Src0);
1535 _xor(T, Src1);
1536 _mov(Dest, T);
1537 break;
1538 case InstArithmetic::Sub:
1539 _mov(T, Src0);
1540 _sub(T, Src1);
1541 _mov(Dest, T);
1542 break;
1543 case InstArithmetic::Mul:
1544 // TODO: Optimize for llvm::isa<Constant>(Src1)
1545 // TODO: Strength-reduce multiplications by a constant,
1546 // particularly -1 and powers of 2. Advanced: use lea to
1547 // multiply by 3, 5, 9.
1548 //
1549 // The 8-bit version of imul only allows the form "imul r/m8"
1550 // where T must be in eax.
Jan Voung0ac50dc2014-09-30 08:36:06 -07001551 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001552 _mov(T, Src0, RegX8632::Reg_eax);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001553 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1554 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001555 _mov(T, Src0);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001556 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001557 _imul(T, Src1);
1558 _mov(Dest, T);
1559 break;
1560 case InstArithmetic::Shl:
1561 _mov(T, Src0);
1562 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001563 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001564 _shl(T, Src1);
1565 _mov(Dest, T);
1566 break;
1567 case InstArithmetic::Lshr:
1568 _mov(T, Src0);
1569 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001570 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001571 _shr(T, Src1);
1572 _mov(Dest, T);
1573 break;
1574 case InstArithmetic::Ashr:
1575 _mov(T, Src0);
1576 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001577 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001578 _sar(T, Src1);
1579 _mov(Dest, T);
1580 break;
1581 case InstArithmetic::Udiv:
Jan Voung70d68832014-06-17 10:02:37 -07001582 // div and idiv are the few arithmetic operators that do not allow
1583 // immediates as the operand.
1584 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001585 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001586 Variable *T_ah = NULL;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001587 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001588 _mov(T, Src0, RegX8632::Reg_eax);
1589 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001590 _div(T, Src1, T_ah);
1591 _mov(Dest, T);
1592 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001593 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001594 _mov(T, Src0, RegX8632::Reg_eax);
1595 _mov(T_edx, Zero, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001596 _div(T, Src1, T_edx);
1597 _mov(Dest, T);
1598 }
1599 break;
1600 case InstArithmetic::Sdiv:
Jan Voung70d68832014-06-17 10:02:37 -07001601 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001602 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001603 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001604 _cbwdq(T, T);
1605 _idiv(T, Src1, T);
1606 _mov(Dest, T);
1607 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001608 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1609 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001610 _cbwdq(T_edx, T);
1611 _idiv(T, Src1, T_edx);
1612 _mov(Dest, T);
1613 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001614 break;
1615 case InstArithmetic::Urem:
Jan Voung70d68832014-06-17 10:02:37 -07001616 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001617 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001618 Variable *T_ah = NULL;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001619 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001620 _mov(T, Src0, RegX8632::Reg_eax);
1621 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001622 _div(T_ah, Src1, T);
1623 _mov(Dest, T_ah);
1624 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001625 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001626 _mov(T_edx, Zero, RegX8632::Reg_edx);
1627 _mov(T, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001628 _div(T_edx, Src1, T);
1629 _mov(Dest, T_edx);
1630 }
1631 break;
1632 case InstArithmetic::Srem:
Jan Voung70d68832014-06-17 10:02:37 -07001633 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001634 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001635 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
1636 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001637 _cbwdq(T, T);
1638 Context.insert(InstFakeDef::create(Func, T_ah));
1639 _idiv(T_ah, Src1, T);
1640 _mov(Dest, T_ah);
1641 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001642 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1643 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001644 _cbwdq(T_edx, T);
1645 _idiv(T_edx, Src1, T);
1646 _mov(Dest, T_edx);
1647 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001648 break;
1649 case InstArithmetic::Fadd:
1650 _mov(T, Src0);
1651 _addss(T, Src1);
1652 _mov(Dest, T);
1653 break;
1654 case InstArithmetic::Fsub:
1655 _mov(T, Src0);
1656 _subss(T, Src1);
1657 _mov(Dest, T);
1658 break;
1659 case InstArithmetic::Fmul:
1660 _mov(T, Src0);
1661 _mulss(T, Src1);
1662 _mov(Dest, T);
1663 break;
1664 case InstArithmetic::Fdiv:
1665 _mov(T, Src0);
1666 _divss(T, Src1);
1667 _mov(Dest, T);
1668 break;
1669 case InstArithmetic::Frem: {
1670 const SizeT MaxSrcs = 2;
1671 Type Ty = Dest->getType();
Jan Voung3a569182014-09-29 10:16:01 -07001672 InstCall *Call = makeHelperCall(
1673 isFloat32Asserting32Or64(Ty) ? "fmodf" : "fmod", Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001674 Call->addArg(Src0);
1675 Call->addArg(Src1);
1676 return lowerCall(Call);
1677 } break;
1678 }
1679 }
1680}
1681
1682void TargetX8632::lowerAssign(const InstAssign *Inst) {
1683 Variable *Dest = Inst->getDest();
1684 Operand *Src0 = Inst->getSrc(0);
1685 assert(Dest->getType() == Src0->getType());
1686 if (Dest->getType() == IceType_i64) {
1687 Src0 = legalize(Src0);
1688 Operand *Src0Lo = loOperand(Src0);
1689 Operand *Src0Hi = hiOperand(Src0);
1690 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1691 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1692 Variable *T_Lo = NULL, *T_Hi = NULL;
1693 _mov(T_Lo, Src0Lo);
1694 _mov(DestLo, T_Lo);
1695 _mov(T_Hi, Src0Hi);
1696 _mov(DestHi, T_Hi);
1697 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001698 // RI is either a physical register or an immediate.
Jim Stichnothad403532014-09-25 12:44:17 -07001699 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm);
Matt Wala45a06232014-07-09 16:33:22 -07001700 if (isVectorType(Dest->getType()))
1701 _movp(Dest, RI);
1702 else
1703 _mov(Dest, RI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001704 }
1705}
1706
1707void TargetX8632::lowerBr(const InstBr *Inst) {
1708 if (Inst->isUnconditional()) {
1709 _br(Inst->getTargetUnconditional());
1710 } else {
Jim Stichnoth206833c2014-08-07 10:58:05 -07001711 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001712 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001713 _cmp(Src0, Zero);
Jan Voungbd385e42014-09-18 18:18:10 -07001714 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001715 }
1716}
1717
1718void TargetX8632::lowerCall(const InstCall *Instr) {
Matt Wala105b7042014-08-11 19:56:19 -07001719 // x86-32 calling convention:
1720 //
1721 // * At the point before the call, the stack must be aligned to 16
1722 // bytes.
1723 //
1724 // * The first four arguments of vector type, regardless of their
1725 // position relative to the other arguments in the argument list, are
1726 // placed in registers xmm0 - xmm3.
1727 //
1728 // * Other arguments are pushed onto the stack in right-to-left order,
1729 // such that the left-most argument ends up on the top of the stack at
1730 // the lowest memory address.
1731 //
1732 // * Stack arguments of vector type are aligned to start at the next
1733 // highest multiple of 16 bytes. Other stack arguments are aligned to
1734 // 4 bytes.
1735 //
1736 // This intends to match the section "IA-32 Function Calling
1737 // Convention" of the document "OS X ABI Function Call Guide" by
1738 // Apple.
1739 NeedsStackAlignment = true;
1740
1741 OperandList XmmArgs;
1742 OperandList StackArgs, StackArgLocations;
1743 uint32_t ParameterAreaSizeBytes = 0;
1744
Matt Wala45a06232014-07-09 16:33:22 -07001745 // Classify each argument operand according to the location where the
1746 // argument is passed.
Matt Wala45a06232014-07-09 16:33:22 -07001747 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1748 Operand *Arg = Instr->getArg(i);
Matt Wala105b7042014-08-11 19:56:19 -07001749 Type Ty = Arg->getType();
1750 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
Jan Voung3a569182014-09-29 10:16:01 -07001751 assert(typeWidthInBytes(Ty) >= 4);
Matt Wala105b7042014-08-11 19:56:19 -07001752 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
Matt Wala45a06232014-07-09 16:33:22 -07001753 XmmArgs.push_back(Arg);
1754 } else {
1755 StackArgs.push_back(Arg);
Matt Wala105b7042014-08-11 19:56:19 -07001756 if (isVectorType(Arg->getType())) {
1757 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1758 }
Jan Voungbd385e42014-09-18 18:18:10 -07001759 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voungbc004632014-09-16 15:09:10 -07001760 Constant *Loc =
1761 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);
Matt Wala105b7042014-08-11 19:56:19 -07001762 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1763 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
Matt Wala45a06232014-07-09 16:33:22 -07001764 }
1765 }
Matt Wala105b7042014-08-11 19:56:19 -07001766
1767 // Adjust the parameter area so that the stack is aligned. It is
1768 // assumed that the stack is already aligned at the start of the
1769 // calling sequence.
1770 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1771
1772 // Subtract the appropriate amount for the argument area. This also
1773 // takes care of setting the stack adjustment during emission.
Matt Wala45a06232014-07-09 16:33:22 -07001774 //
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001775 // TODO: If for some reason the call instruction gets dead-code
1776 // eliminated after lowering, we would need to ensure that the
Matt Wala105b7042014-08-11 19:56:19 -07001777 // pre-call and the post-call esp adjustment get eliminated as well.
1778 if (ParameterAreaSizeBytes) {
1779 _adjust_stack(ParameterAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001780 }
Matt Wala105b7042014-08-11 19:56:19 -07001781
1782 // Copy arguments that are passed on the stack to the appropriate
1783 // stack locations.
1784 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
1785 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
1786 // TODO: Consider calling postLower() here to reduce the register
1787 // pressure associated with using too many infinite weight
1788 // temporaries when lowering the call sequence in -Om1 mode.
1789 }
1790
Matt Wala45a06232014-07-09 16:33:22 -07001791 // Copy arguments to be passed in registers to the appropriate
1792 // registers.
1793 // TODO: Investigate the impact of lowering arguments passed in
1794 // registers after lowering stack arguments as opposed to the other
1795 // way around. Lowering register arguments after stack arguments may
1796 // reduce register pressure. On the other hand, lowering register
1797 // arguments first (before stack arguments) may result in more compact
1798 // code, as the memory operand displacements may end up being smaller
1799 // before any stack adjustment is done.
1800 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Jim Stichnothad403532014-09-25 12:44:17 -07001801 Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
Matt Wala45a06232014-07-09 16:33:22 -07001802 // Generate a FakeUse of register arguments so that they do not get
1803 // dead code eliminated as a result of the FakeKill of scratch
1804 // registers after the call.
1805 Context.insert(InstFakeUse::create(Func, Reg));
1806 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001807 // Generate the call instruction. Assign its result to a temporary
1808 // with high register allocation weight.
1809 Variable *Dest = Instr->getDest();
Matt Wala45a06232014-07-09 16:33:22 -07001810 // ReturnReg doubles as ReturnRegLo as necessary.
1811 Variable *ReturnReg = NULL;
1812 Variable *ReturnRegHi = NULL;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001813 if (Dest) {
1814 switch (Dest->getType()) {
1815 case IceType_NUM:
1816 llvm_unreachable("Invalid Call dest type");
1817 break;
1818 case IceType_void:
1819 break;
1820 case IceType_i1:
1821 case IceType_i8:
1822 case IceType_i16:
1823 case IceType_i32:
Jan Voungbd385e42014-09-18 18:18:10 -07001824 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001825 break;
1826 case IceType_i64:
Jan Voungbd385e42014-09-18 18:18:10 -07001827 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
1828 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001829 break;
1830 case IceType_f32:
1831 case IceType_f64:
Matt Wala45a06232014-07-09 16:33:22 -07001832 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with
1833 // the fstp instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001834 break;
Matt Wala928f1292014-07-07 16:50:46 -07001835 case IceType_v4i1:
1836 case IceType_v8i1:
1837 case IceType_v16i1:
1838 case IceType_v16i8:
1839 case IceType_v8i16:
1840 case IceType_v4i32:
Matt Wala45a06232014-07-09 16:33:22 -07001841 case IceType_v4f32:
Jan Voungbd385e42014-09-18 18:18:10 -07001842 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
Matt Wala45a06232014-07-09 16:33:22 -07001843 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001844 }
1845 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07001846 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once
1847 // a proper emitter is used.
1848 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);
Matt Wala45a06232014-07-09 16:33:22 -07001849 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001850 Context.insert(NewCall);
Matt Wala45a06232014-07-09 16:33:22 -07001851 if (ReturnRegHi)
1852 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001853
Matt Wala105b7042014-08-11 19:56:19 -07001854 // Add the appropriate offset to esp. The call instruction takes care
1855 // of resetting the stack offset during emission.
1856 if (ParameterAreaSizeBytes) {
Jan Voungbd385e42014-09-18 18:18:10 -07001857 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voungbc004632014-09-16 15:09:10 -07001858 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001859 }
1860
1861 // Insert a register-kill pseudo instruction.
1862 VarList KilledRegs;
1863 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1864 if (ScratchRegs[i])
1865 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1866 }
1867 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
1868
1869 // Generate a FakeUse to keep the call live if necessary.
Matt Wala45a06232014-07-09 16:33:22 -07001870 if (Instr->hasSideEffects() && ReturnReg) {
1871 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001872 Context.insert(FakeUse);
1873 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001874
Matt Wala45a06232014-07-09 16:33:22 -07001875 if (!Dest)
1876 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001877
Matt Wala45a06232014-07-09 16:33:22 -07001878 // Assign the result of the call to Dest.
1879 if (ReturnReg) {
1880 if (ReturnRegHi) {
1881 assert(Dest->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001882 split64(Dest);
1883 Variable *DestLo = Dest->getLo();
1884 Variable *DestHi = Dest->getHi();
Matt Wala45a06232014-07-09 16:33:22 -07001885 _mov(DestLo, ReturnReg);
1886 _mov(DestHi, ReturnRegHi);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001887 } else {
Matt Wala45a06232014-07-09 16:33:22 -07001888 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1889 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1890 isVectorType(Dest->getType()));
Matt Wala45a06232014-07-09 16:33:22 -07001891 if (isVectorType(Dest->getType())) {
1892 _movp(Dest, ReturnReg);
1893 } else {
1894 _mov(Dest, ReturnReg);
1895 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001896 }
Jan Voung3a569182014-09-29 10:16:01 -07001897 } else if (isScalarFloatingType(Dest->getType())) {
Matt Wala45a06232014-07-09 16:33:22 -07001898 // Special treatment for an FP function which returns its result in
1899 // st(0).
Matt Wala45a06232014-07-09 16:33:22 -07001900 // If Dest ends up being a physical xmm register, the fstp emit code
1901 // will route st(0) through a temporary stack slot.
Jim Stichnotha5229722014-09-12 13:06:09 -07001902 _fstp(Dest);
1903 // Create a fake use of Dest in case it actually isn't used,
1904 // because st(0) still needs to be popped.
1905 Context.insert(InstFakeUse::create(Func, Dest));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001906 }
1907}
1908
1909void TargetX8632::lowerCast(const InstCast *Inst) {
1910 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1911 InstCast::OpKind CastKind = Inst->getCastKind();
1912 Variable *Dest = Inst->getDest();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001913 switch (CastKind) {
1914 default:
1915 Func->setError("Cast type not supported");
1916 return;
Jan Voung1ee34162014-06-24 13:43:30 -07001917 case InstCast::Sext: {
1918 // Src0RM is the source operand legalized to physical register or memory,
1919 // but not immediate, since the relevant x86 native instructions don't
1920 // allow an immediate operand. If the operand is an immediate, we could
1921 // consider computing the strength-reduced result at translation time,
1922 // but we're unlikely to see something like that in the bitcode that
1923 // the optimizer wouldn't have already taken care of.
1924 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001925 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07001926 Type DestTy = Dest->getType();
1927 if (DestTy == IceType_v16i8) {
1928 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
1929 Variable *OneMask = makeVectorOfOnes(Dest->getType());
1930 Variable *T = makeReg(DestTy);
1931 _movp(T, Src0RM);
1932 _pand(T, OneMask);
1933 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1934 _pcmpgt(T, Zeros);
1935 _movp(Dest, T);
1936 } else {
1937 // width = width(elty) - 1; dest = (src << width) >> width
1938 SizeT ShiftAmount =
1939 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
Jan Voungbc004632014-09-16 15:09:10 -07001940 Constant *ShiftConstant =
1941 Ctx->getConstantInt32(IceType_i8, ShiftAmount);
Matt Wala83b80362014-07-16 10:21:30 -07001942 Variable *T = makeReg(DestTy);
1943 _movp(T, Src0RM);
1944 _psll(T, ShiftConstant);
1945 _psra(T, ShiftConstant);
1946 _movp(Dest, T);
1947 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07001948 } else if (Dest->getType() == IceType_i64) {
1949 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Jan Voungbc004632014-09-16 15:09:10 -07001950 Constant *Shift = Ctx->getConstantInt32(IceType_i32, 31);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001951 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1952 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1953 Variable *T_Lo = makeReg(DestLo->getType());
1954 if (Src0RM->getType() == IceType_i32) {
1955 _mov(T_Lo, Src0RM);
1956 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001957 _movzx(T_Lo, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001958 _shl(T_Lo, Shift);
1959 _sar(T_Lo, Shift);
1960 } else {
1961 _movsx(T_Lo, Src0RM);
1962 }
1963 _mov(DestLo, T_Lo);
1964 Variable *T_Hi = NULL;
1965 _mov(T_Hi, T_Lo);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001966 if (Src0RM->getType() != IceType_i1)
1967 // For i1, the sar instruction is already done above.
1968 _sar(T_Hi, Shift);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001969 _mov(DestHi, T_Hi);
1970 } else if (Src0RM->getType() == IceType_i1) {
1971 // t1 = src
1972 // shl t1, dst_bitwidth - 1
1973 // sar t1, dst_bitwidth - 1
1974 // dst = t1
1975 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
Jan Voungbc004632014-09-16 15:09:10 -07001976 Constant *ShiftAmount = Ctx->getConstantInt32(IceType_i32, DestBits - 1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001977 Variable *T = makeReg(Dest->getType());
1978 if (typeWidthInBytes(Dest->getType()) <=
1979 typeWidthInBytes(Src0RM->getType())) {
1980 _mov(T, Src0RM);
1981 } else {
1982 // Widen the source using movsx or movzx. (It doesn't matter
1983 // which one, since the following shl/sar overwrite the bits.)
1984 _movzx(T, Src0RM);
1985 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07001986 _shl(T, ShiftAmount);
1987 _sar(T, ShiftAmount);
1988 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001989 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001990 // t1 = movsx src; dst = t1
1991 Variable *T = makeReg(Dest->getType());
1992 _movsx(T, Src0RM);
1993 _mov(Dest, T);
1994 }
1995 break;
Jan Voung1ee34162014-06-24 13:43:30 -07001996 }
1997 case InstCast::Zext: {
1998 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001999 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07002000 // onemask = materialize(1,1,...); dest = onemask & src
2001 Type DestTy = Dest->getType();
2002 Variable *OneMask = makeVectorOfOnes(DestTy);
2003 Variable *T = makeReg(DestTy);
2004 _movp(T, Src0RM);
2005 _pand(T, OneMask);
2006 _movp(Dest, T);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002007 } else if (Dest->getType() == IceType_i64) {
2008 // t1=movzx src; dst.lo=t1; dst.hi=0
2009 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2012 Variable *Tmp = makeReg(DestLo->getType());
2013 if (Src0RM->getType() == IceType_i32) {
2014 _mov(Tmp, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002015 } else {
2016 _movzx(Tmp, Src0RM);
2017 }
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002018 if (Src0RM->getType() == IceType_i1) {
Jan Voungbc004632014-09-16 15:09:10 -07002019 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002020 _and(Tmp, One);
2021 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002022 _mov(DestLo, Tmp);
2023 _mov(DestHi, Zero);
2024 } else if (Src0RM->getType() == IceType_i1) {
2025 // t = Src0RM; t &= 1; Dest = t
Jan Voungbc004632014-09-16 15:09:10 -07002026 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
Jan Voung39d4aca2014-10-15 15:16:54 -07002027 Type DestTy = Dest->getType();
2028 Variable *T;
2029 if (DestTy == IceType_i8) {
2030 T = makeReg(DestTy);
2031 _mov(T, Src0RM);
2032 } else {
2033 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
2034 T = makeReg(IceType_i32);
2035 _movzx(T, Src0RM);
2036 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002037 _and(T, One);
2038 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002039 } else {
2040 // t1 = movzx src; dst = t1
2041 Variable *T = makeReg(Dest->getType());
2042 _movzx(T, Src0RM);
2043 _mov(Dest, T);
2044 }
2045 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002046 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002047 case InstCast::Trunc: {
Matt Wala83b80362014-07-16 10:21:30 -07002048 if (isVectorType(Dest->getType())) {
2049 // onemask = materialize(1,1,...); dst = src & onemask
2050 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2051 Type Src0Ty = Src0RM->getType();
2052 Variable *OneMask = makeVectorOfOnes(Src0Ty);
2053 Variable *T = makeReg(Dest->getType());
2054 _movp(T, Src0RM);
2055 _pand(T, OneMask);
2056 _movp(Dest, T);
2057 } else {
2058 Operand *Src0 = Inst->getSrc(0);
2059 if (Src0->getType() == IceType_i64)
2060 Src0 = loOperand(Src0);
2061 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2062 // t1 = trunc Src0RM; Dest = t1
2063 Variable *T = NULL;
2064 _mov(T, Src0RM);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002065 if (Dest->getType() == IceType_i1)
Jan Voungbc004632014-09-16 15:09:10 -07002066 _and(T, Ctx->getConstantInt32(IceType_i1, 1));
Matt Wala83b80362014-07-16 10:21:30 -07002067 _mov(Dest, T);
2068 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002069 break;
2070 }
2071 case InstCast::Fptrunc:
2072 case InstCast::Fpext: {
Jan Voung1ee34162014-06-24 13:43:30 -07002073 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002074 // t1 = cvt Src0RM; Dest = t1
2075 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002076 _cvt(T, Src0RM, InstX8632Cvt::Float2float);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002077 _mov(Dest, T);
2078 break;
2079 }
2080 case InstCast::Fptosi:
Matt Wala83b80362014-07-16 10:21:30 -07002081 if (isVectorType(Dest->getType())) {
2082 assert(Dest->getType() == IceType_v4i32 &&
2083 Inst->getSrc(0)->getType() == IceType_v4f32);
2084 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2085 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002086 _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
Matt Wala83b80362014-07-16 10:21:30 -07002087 _movp(Dest, T);
2088 } else if (Dest->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002089 // Use a helper for converting floating-point values to 64-bit
2090 // integers. SSE2 appears to have no way to convert from xmm
2091 // registers to something like the edx:eax register pair, and
2092 // gcc and clang both want to use x87 instructions complete with
2093 // temporary manipulation of the status word. This helper is
2094 // not needed for x86-64.
2095 split64(Dest);
2096 const SizeT MaxSrcs = 1;
2097 Type SrcType = Inst->getSrc(0)->getType();
2098 InstCall *Call = makeHelperCall(
Jan Voung3a569182014-09-29 10:16:01 -07002099 isFloat32Asserting32Or64(SrcType) ? "cvtftosi64" : "cvtdtosi64", Dest,
2100 MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002101 // TODO: Call the correct compiler-rt helper function.
2102 Call->addArg(Inst->getSrc(0));
2103 lowerCall(Call);
2104 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002105 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002106 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2107 Variable *T_1 = makeReg(IceType_i32);
2108 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002109 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002110 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002111 if (Dest->getType() == IceType_i1)
Jan Voungbc004632014-09-16 15:09:10 -07002112 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002113 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002114 }
2115 break;
2116 case InstCast::Fptoui:
Matt Wala83b80362014-07-16 10:21:30 -07002117 if (isVectorType(Dest->getType())) {
2118 assert(Dest->getType() == IceType_v4i32 &&
2119 Inst->getSrc(0)->getType() == IceType_v4f32);
2120 const SizeT MaxSrcs = 1;
2121 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
2122 Call->addArg(Inst->getSrc(0));
2123 lowerCall(Call);
2124 } else if (Dest->getType() == IceType_i64 ||
2125 Dest->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002126 // Use a helper for both x86-32 and x86-64.
2127 split64(Dest);
2128 const SizeT MaxSrcs = 1;
2129 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07002130 Type SrcType = Inst->getSrc(0)->getType();
Jan Voung3a569182014-09-29 10:16:01 -07002131 IceString DstSubstring = (isInt32Asserting32Or64(DestType) ? "32" : "64");
2132 IceString SrcSubstring = (isFloat32Asserting32Or64(SrcType) ? "f" : "d");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002133 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
2134 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
2135 // TODO: Call the correct compiler-rt helper function.
2136 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2137 Call->addArg(Inst->getSrc(0));
2138 lowerCall(Call);
2139 return;
2140 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002141 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002142 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2143 Variable *T_1 = makeReg(IceType_i32);
2144 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002145 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002146 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002147 if (Dest->getType() == IceType_i1)
Jan Voungbc004632014-09-16 15:09:10 -07002148 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002149 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002150 }
2151 break;
2152 case InstCast::Sitofp:
Matt Wala83b80362014-07-16 10:21:30 -07002153 if (isVectorType(Dest->getType())) {
2154 assert(Dest->getType() == IceType_v4f32 &&
2155 Inst->getSrc(0)->getType() == IceType_v4i32);
2156 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2157 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002158 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
Matt Wala83b80362014-07-16 10:21:30 -07002159 _movp(Dest, T);
2160 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002161 // Use a helper for x86-32.
2162 const SizeT MaxSrcs = 1;
2163 Type DestType = Dest->getType();
2164 InstCall *Call = makeHelperCall(
Jan Voung3a569182014-09-29 10:16:01 -07002165 isFloat32Asserting32Or64(DestType) ? "cvtsi64tof" : "cvtsi64tod",
2166 Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002167 // TODO: Call the correct compiler-rt helper function.
2168 Call->addArg(Inst->getSrc(0));
2169 lowerCall(Call);
2170 return;
2171 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002172 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002173 // Sign-extend the operand.
2174 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2175 Variable *T_1 = makeReg(IceType_i32);
2176 Variable *T_2 = makeReg(Dest->getType());
2177 if (Src0RM->getType() == IceType_i32)
2178 _mov(T_1, Src0RM);
2179 else
2180 _movsx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002181 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002182 _mov(Dest, T_2);
2183 }
2184 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002185 case InstCast::Uitofp: {
2186 Operand *Src0 = Inst->getSrc(0);
Matt Wala83b80362014-07-16 10:21:30 -07002187 if (isVectorType(Src0->getType())) {
2188 assert(Dest->getType() == IceType_v4f32 &&
2189 Src0->getType() == IceType_v4i32);
2190 const SizeT MaxSrcs = 1;
2191 InstCall *Call = makeHelperCall("Sz_uitofp_v4i32", Dest, MaxSrcs);
2192 Call->addArg(Src0);
2193 lowerCall(Call);
2194 } else if (Src0->getType() == IceType_i64 ||
2195 Src0->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002196 // Use a helper for x86-32 and x86-64. Also use a helper for
2197 // i32 on x86-32.
2198 const SizeT MaxSrcs = 1;
2199 Type DestType = Dest->getType();
Jan Voung3a569182014-09-29 10:16:01 -07002200 IceString SrcSubstring =
2201 (isInt32Asserting32Or64(Src0->getType()) ? "32" : "64");
2202 IceString DstSubstring = (isFloat32Asserting32Or64(DestType) ? "f" : "d");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002203 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
2204 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
2205 // TODO: Call the correct compiler-rt helper function.
2206 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Jan Voung1ee34162014-06-24 13:43:30 -07002207 Call->addArg(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002208 lowerCall(Call);
2209 return;
2210 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002211 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002212 // Zero-extend the operand.
2213 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2214 Variable *T_1 = makeReg(IceType_i32);
2215 Variable *T_2 = makeReg(Dest->getType());
2216 if (Src0RM->getType() == IceType_i32)
2217 _mov(T_1, Src0RM);
2218 else
2219 _movzx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002220 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002221 _mov(Dest, T_2);
2222 }
2223 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002224 }
2225 case InstCast::Bitcast: {
2226 Operand *Src0 = Inst->getSrc(0);
2227 if (Dest->getType() == Src0->getType()) {
2228 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002229 lowerAssign(Assign);
2230 return;
2231 }
2232 switch (Dest->getType()) {
2233 default:
2234 llvm_unreachable("Unexpected Bitcast dest type");
Matt Wala83b80362014-07-16 10:21:30 -07002235 case IceType_i8: {
2236 assert(Src0->getType() == IceType_v8i1);
2237 InstCall *Call = makeHelperCall("Sz_bitcast_v8i1_to_i8", Dest, 1);
2238 Call->addArg(Src0);
2239 lowerCall(Call);
2240 } break;
2241 case IceType_i16: {
2242 assert(Src0->getType() == IceType_v16i1);
2243 InstCall *Call = makeHelperCall("Sz_bitcast_v16i1_to_i16", Dest, 1);
2244 Call->addArg(Src0);
2245 lowerCall(Call);
2246 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002247 case IceType_i32:
2248 case IceType_f32: {
Jan Voung1ee34162014-06-24 13:43:30 -07002249 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002250 Type DestType = Dest->getType();
2251 Type SrcType = Src0RM->getType();
Jim Stichnoth6e992142014-07-30 14:45:20 -07002252 (void)DestType;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002253 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2254 (DestType == IceType_f32 && SrcType == IceType_i32));
2255 // a.i32 = bitcast b.f32 ==>
2256 // t.f32 = b.f32
2257 // s.f32 = spill t.f32
2258 // a.i32 = s.f32
2259 Variable *T = NULL;
2260 // TODO: Should be able to force a spill setup by calling legalize() with
2261 // Legal_Mem and not Legal_Reg or Legal_Imm.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002262 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002263 SpillVar->setLinkedTo(Dest);
2264 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002265 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002266 _mov(T, Src0RM);
2267 _mov(Spill, T);
2268 _mov(Dest, Spill);
2269 } break;
2270 case IceType_i64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002271 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002272 assert(Src0RM->getType() == IceType_f64);
2273 // a.i64 = bitcast b.f64 ==>
2274 // s.f64 = spill b.f64
2275 // t_lo.i32 = lo(s.f64)
2276 // a_lo.i32 = t_lo.i32
2277 // t_hi.i32 = hi(s.f64)
2278 // a_hi.i32 = t_hi.i32
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002279 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002280 SpillVar->setLinkedTo(llvm::dyn_cast<Variable>(Src0RM));
2281 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002282 Spill->setWeight(RegWeight::Zero);
Jan Voung5cd240d2014-06-25 10:36:46 -07002283 _movq(Spill, Src0RM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002284
2285 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2286 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2287 Variable *T_Lo = makeReg(IceType_i32);
2288 Variable *T_Hi = makeReg(IceType_i32);
2289 VariableSplit *SpillLo =
2290 VariableSplit::create(Func, Spill, VariableSplit::Low);
2291 VariableSplit *SpillHi =
2292 VariableSplit::create(Func, Spill, VariableSplit::High);
2293
2294 _mov(T_Lo, SpillLo);
2295 _mov(DestLo, T_Lo);
2296 _mov(T_Hi, SpillHi);
2297 _mov(DestHi, T_Hi);
2298 } break;
2299 case IceType_f64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002300 Src0 = legalize(Src0);
2301 assert(Src0->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002302 // a.f64 = bitcast b.i64 ==>
2303 // t_lo.i32 = b_lo.i32
Jan Voung1ee34162014-06-24 13:43:30 -07002304 // FakeDef(s.f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002305 // lo(s.f64) = t_lo.i32
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002306 // t_hi.i32 = b_hi.i32
2307 // hi(s.f64) = t_hi.i32
2308 // a.f64 = s.f64
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002309 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002310 SpillVar->setLinkedTo(Dest);
2311 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002312 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002313
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002314 Variable *T_Lo = NULL, *T_Hi = NULL;
2315 VariableSplit *SpillLo =
2316 VariableSplit::create(Func, Spill, VariableSplit::Low);
2317 VariableSplit *SpillHi =
2318 VariableSplit::create(Func, Spill, VariableSplit::High);
Jan Voung1ee34162014-06-24 13:43:30 -07002319 _mov(T_Lo, loOperand(Src0));
2320 // Technically, the Spill is defined after the _store happens, but
2321 // SpillLo is considered a "use" of Spill so define Spill before it
2322 // is used.
2323 Context.insert(InstFakeDef::create(Func, Spill));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002324 _store(T_Lo, SpillLo);
Jan Voung1ee34162014-06-24 13:43:30 -07002325 _mov(T_Hi, hiOperand(Src0));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002326 _store(T_Hi, SpillHi);
Jan Voung5cd240d2014-06-25 10:36:46 -07002327 _movq(Dest, Spill);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002328 } break;
Matt Wala83b80362014-07-16 10:21:30 -07002329 case IceType_v8i1: {
2330 assert(Src0->getType() == IceType_i8);
2331 InstCall *Call = makeHelperCall("Sz_bitcast_i8_to_v8i1", Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002332 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002333 // Arguments to functions are required to be at least 32 bits wide.
2334 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2335 Call->addArg(Src0AsI32);
2336 lowerCall(Call);
2337 } break;
2338 case IceType_v16i1: {
2339 assert(Src0->getType() == IceType_i16);
2340 InstCall *Call = makeHelperCall("Sz_bitcast_i16_to_v16i1", Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002341 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002342 // Arguments to functions are required to be at least 32 bits wide.
2343 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2344 Call->addArg(Src0AsI32);
2345 lowerCall(Call);
2346 } break;
2347 case IceType_v8i16:
2348 case IceType_v16i8:
2349 case IceType_v4i32:
2350 case IceType_v4f32: {
2351 _movp(Dest, legalizeToVar(Src0));
2352 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002353 }
2354 break;
2355 }
Jan Voung1ee34162014-06-24 13:43:30 -07002356 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002357}
2358
Matt Wala49889232014-07-18 12:45:09 -07002359void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002360 Operand *SourceVectNotLegalized = Inst->getSrc(0);
Jan Voungbc004632014-09-16 15:09:10 -07002361 ConstantInteger32 *ElementIndex =
2362 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
Matt Wala49889232014-07-18 12:45:09 -07002363 // Only constant indices are allowed in PNaCl IR.
2364 assert(ElementIndex);
2365
2366 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002367 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002368 Type ElementTy = typeElementType(Ty);
2369 Type InVectorElementTy = getInVectorElementType(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002370 Variable *ExtractedElementR = makeReg(InVectorElementTy);
Matt Wala49889232014-07-18 12:45:09 -07002371
2372 // TODO(wala): Determine the best lowering sequences for each type.
Matt Wala0a450512014-07-30 12:44:39 -07002373 bool CanUsePextr =
2374 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2375 if (CanUsePextr && Ty != IceType_v4f32) {
2376 // Use pextrb, pextrw, or pextrd.
Jan Voungbc004632014-09-16 15:09:10 -07002377 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);
Matt Walae3777672014-07-31 09:06:17 -07002378 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2379 _pextr(ExtractedElementR, SourceVectR, Mask);
Matt Wala0a450512014-07-30 12:44:39 -07002380 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2381 // Use pshufd and movd/movss.
Matt Walacfe51462014-07-25 15:57:56 -07002382 Variable *T = NULL;
Matt Wala49889232014-07-18 12:45:09 -07002383 if (Index) {
2384 // The shuffle only needs to occur if the element to be extracted
2385 // is not at the lowest index.
Jan Voungbc004632014-09-16 15:09:10 -07002386 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);
Matt Wala49889232014-07-18 12:45:09 -07002387 T = makeReg(Ty);
Matt Walad4799f42014-08-14 14:24:12 -07002388 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
Matt Wala49889232014-07-18 12:45:09 -07002389 } else {
Matt Walad4799f42014-08-14 14:24:12 -07002390 T = legalizeToVar(SourceVectNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002391 }
2392
2393 if (InVectorElementTy == IceType_i32) {
Matt Walae3777672014-07-31 09:06:17 -07002394 _movd(ExtractedElementR, T);
Jan Voung3a569182014-09-29 10:16:01 -07002395 } else { // Ty == IceType_f32
Matt Walacfe51462014-07-25 15:57:56 -07002396 // TODO(wala): _movss is only used here because _mov does not
2397 // allow a vector source and a scalar destination. _mov should be
2398 // able to be used here.
2399 // _movss is a binary instruction, so the FakeDef is needed to
2400 // keep the live range analysis consistent.
Matt Walae3777672014-07-31 09:06:17 -07002401 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2402 _movss(ExtractedElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002403 }
Matt Wala49889232014-07-18 12:45:09 -07002404 } else {
2405 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2406 // Spill the value to a stack slot and do the extraction in memory.
Matt Wala49889232014-07-18 12:45:09 -07002407 //
Matt Walae3777672014-07-31 09:06:17 -07002408 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002409 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002410 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07002411 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002412 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002413
2414 // Compute the location of the element in memory.
2415 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2416 OperandX8632Mem *Loc =
2417 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002418 _mov(ExtractedElementR, Loc);
Matt Wala49889232014-07-18 12:45:09 -07002419 }
2420
2421 if (ElementTy == IceType_i1) {
2422 // Truncate extracted integers to i1s if necessary.
2423 Variable *T = makeReg(IceType_i1);
2424 InstCast *Cast =
Matt Walae3777672014-07-31 09:06:17 -07002425 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002426 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002427 ExtractedElementR = T;
Matt Wala49889232014-07-18 12:45:09 -07002428 }
2429
2430 // Copy the element to the destination.
2431 Variable *Dest = Inst->getDest();
Matt Walae3777672014-07-31 09:06:17 -07002432 _mov(Dest, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002433}
2434
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002435void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
2436 Operand *Src0 = Inst->getSrc(0);
2437 Operand *Src1 = Inst->getSrc(1);
2438 Variable *Dest = Inst->getDest();
Matt Walace0ca8f2014-07-24 12:34:20 -07002439
2440 if (isVectorType(Dest->getType())) {
2441 InstFcmp::FCond Condition = Inst->getCondition();
2442 size_t Index = static_cast<size_t>(Condition);
2443 assert(Index < TableFcmpSize);
2444
2445 if (TableFcmp[Index].SwapVectorOperands) {
2446 Operand *T = Src0;
2447 Src0 = Src1;
2448 Src1 = T;
2449 }
2450
2451 Variable *T = NULL;
2452
Matt Walae3777672014-07-31 09:06:17 -07002453 if (Condition == InstFcmp::True) {
2454 // makeVectorOfOnes() requires an integer vector type.
Matt Walace0ca8f2014-07-24 12:34:20 -07002455 T = makeVectorOfMinusOnes(IceType_v4i32);
Matt Walae3777672014-07-31 09:06:17 -07002456 } else if (Condition == InstFcmp::False) {
2457 T = makeVectorOfZeros(Dest->getType());
2458 } else {
2459 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2460 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2461
Matt Walae3777672014-07-31 09:06:17 -07002462 switch (Condition) {
2463 default: {
Jan Voungbd385e42014-09-18 18:18:10 -07002464 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;
2465 assert(Predicate != CondX86::Cmpps_Invalid);
Matt Walae3777672014-07-31 09:06:17 -07002466 T = makeReg(Src0RM->getType());
2467 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002468 _cmpps(T, Src1RM, Predicate);
Matt Walae3777672014-07-31 09:06:17 -07002469 } break;
2470 case InstFcmp::One: {
2471 // Check both unequal and ordered.
2472 T = makeReg(Src0RM->getType());
2473 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002474 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002475 _cmpps(T, Src1RM, CondX86::Cmpps_neq);
Matt Walae3777672014-07-31 09:06:17 -07002476 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002477 _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
Matt Walae3777672014-07-31 09:06:17 -07002478 _pand(T, T2);
2479 } break;
2480 case InstFcmp::Ueq: {
2481 // Check both equal or unordered.
2482 T = makeReg(Src0RM->getType());
2483 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002484 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002485 _cmpps(T, Src1RM, CondX86::Cmpps_eq);
Matt Walae3777672014-07-31 09:06:17 -07002486 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002487 _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
Matt Walae3777672014-07-31 09:06:17 -07002488 _por(T, T2);
2489 } break;
2490 }
Matt Walae3777672014-07-31 09:06:17 -07002491 }
Matt Walace0ca8f2014-07-24 12:34:20 -07002492
2493 _movp(Dest, T);
2494 eliminateNextVectorSextInstruction(Dest);
2495 return;
2496 }
2497
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002498 // Lowering a = fcmp cond, b, c
2499 // ucomiss b, c /* only if C1 != Br_None */
2500 // /* but swap b,c order if SwapOperands==true */
2501 // mov a, <default>
2502 // j<C1> label /* only if C1 != Br_None */
2503 // j<C2> label /* only if C2 != Br_None */
2504 // FakeUse(a) /* only if C1 != Br_None */
2505 // mov a, !<default> /* only if C1 != Br_None */
2506 // label: /* only if C1 != Br_None */
2507 InstFcmp::FCond Condition = Inst->getCondition();
2508 size_t Index = static_cast<size_t>(Condition);
2509 assert(Index < TableFcmpSize);
Matt Walace0ca8f2014-07-24 12:34:20 -07002510 if (TableFcmp[Index].SwapScalarOperands) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002511 Operand *Tmp = Src0;
2512 Src0 = Src1;
2513 Src1 = Tmp;
2514 }
Jan Voungbd385e42014-09-18 18:18:10 -07002515 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
2516 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002517 if (HasC1) {
2518 Src0 = legalize(Src0);
2519 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2520 Variable *T = NULL;
2521 _mov(T, Src0);
2522 _ucomiss(T, Src1RM);
2523 }
2524 Constant *Default =
Jan Voungbc004632014-09-16 15:09:10 -07002525 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002526 _mov(Dest, Default);
2527 if (HasC1) {
2528 InstX8632Label *Label = InstX8632Label::create(Func, this);
2529 _br(TableFcmp[Index].C1, Label);
2530 if (HasC2) {
2531 _br(TableFcmp[Index].C2, Label);
2532 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002533 Constant *NonDefault =
Jan Voungbc004632014-09-16 15:09:10 -07002534 Ctx->getConstantInt32(IceType_i32, !TableFcmp[Index].Default);
Jim Stichnoth47752552014-10-13 17:15:08 -07002535 _mov_nonkillable(Dest, NonDefault);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002536 Context.insert(Label);
2537 }
2538}
2539
2540void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2541 Operand *Src0 = legalize(Inst->getSrc(0));
2542 Operand *Src1 = legalize(Inst->getSrc(1));
2543 Variable *Dest = Inst->getDest();
2544
Matt Wala9a0168a2014-07-23 14:56:10 -07002545 if (isVectorType(Dest->getType())) {
2546 Type Ty = Src0->getType();
2547 // Promote i1 vectors to 128 bit integer vector types.
2548 if (typeElementType(Ty) == IceType_i1) {
2549 Type NewTy = IceType_NUM;
2550 switch (Ty) {
2551 default:
2552 llvm_unreachable("unexpected type");
2553 break;
2554 case IceType_v4i1:
2555 NewTy = IceType_v4i32;
2556 break;
2557 case IceType_v8i1:
2558 NewTy = IceType_v8i16;
2559 break;
2560 case IceType_v16i1:
2561 NewTy = IceType_v16i8;
2562 break;
2563 }
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002564 Variable *NewSrc0 = Func->makeVariable(NewTy);
2565 Variable *NewSrc1 = Func->makeVariable(NewTy);
Matt Wala9a0168a2014-07-23 14:56:10 -07002566 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2567 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2568 Src0 = NewSrc0;
2569 Src1 = NewSrc1;
2570 Ty = NewTy;
2571 }
2572
2573 InstIcmp::ICond Condition = Inst->getCondition();
2574
Matt Walae3777672014-07-31 09:06:17 -07002575 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2576 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2577
Matt Wala9a0168a2014-07-23 14:56:10 -07002578 // SSE2 only has signed comparison operations. Transform unsigned
2579 // inputs in a manner that allows for the use of signed comparison
2580 // operations by flipping the high order bits.
2581 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2582 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2583 Variable *T0 = makeReg(Ty);
2584 Variable *T1 = makeReg(Ty);
2585 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002586 _movp(T0, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002587 _pxor(T0, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002588 _movp(T1, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002589 _pxor(T1, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002590 Src0RM = T0;
2591 Src1RM = T1;
Matt Wala9a0168a2014-07-23 14:56:10 -07002592 }
2593
Matt Wala9a0168a2014-07-23 14:56:10 -07002594 Variable *T = makeReg(Ty);
2595 switch (Condition) {
2596 default:
2597 llvm_unreachable("unexpected condition");
2598 break;
2599 case InstIcmp::Eq: {
Matt Walae3777672014-07-31 09:06:17 -07002600 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002601 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002602 } break;
2603 case InstIcmp::Ne: {
Matt Walae3777672014-07-31 09:06:17 -07002604 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002605 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002606 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2607 _pxor(T, MinusOne);
2608 } break;
2609 case InstIcmp::Ugt:
2610 case InstIcmp::Sgt: {
Matt Walae3777672014-07-31 09:06:17 -07002611 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002612 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002613 } break;
2614 case InstIcmp::Uge:
2615 case InstIcmp::Sge: {
Matt Walae3777672014-07-31 09:06:17 -07002616 // !(Src1RM > Src0RM)
2617 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002618 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002619 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2620 _pxor(T, MinusOne);
2621 } break;
2622 case InstIcmp::Ult:
2623 case InstIcmp::Slt: {
Matt Walae3777672014-07-31 09:06:17 -07002624 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002625 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002626 } break;
2627 case InstIcmp::Ule:
2628 case InstIcmp::Sle: {
Matt Walae3777672014-07-31 09:06:17 -07002629 // !(Src0RM > Src1RM)
2630 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002631 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002632 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2633 _pxor(T, MinusOne);
2634 } break;
2635 }
Matt Wala9a0168a2014-07-23 14:56:10 -07002636
2637 _movp(Dest, T);
Matt Walace0ca8f2014-07-24 12:34:20 -07002638 eliminateNextVectorSextInstruction(Dest);
Matt Wala9a0168a2014-07-23 14:56:10 -07002639 return;
2640 }
2641
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002642 // If Src1 is an immediate, or known to be a physical register, we can
2643 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2644 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2645 // the physical register, but unfortunately we have to commit to one or
2646 // the other before register allocation.)
2647 bool IsSrc1ImmOrReg = false;
2648 if (llvm::isa<Constant>(Src1)) {
2649 IsSrc1ImmOrReg = true;
2650 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
2651 if (Var->hasReg())
2652 IsSrc1ImmOrReg = true;
2653 }
2654
2655 // Try to fuse a compare immediately followed by a conditional branch. This
2656 // is possible when the compare dest and the branch source operands are the
2657 // same, and are their only uses. TODO: implement this optimization for i64.
2658 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
2659 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
2660 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07002661 NextBr->setDeleted();
Jim Stichnothad403532014-09-25 12:44:17 -07002662 Operand *Src0RM =
2663 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002664 _cmp(Src0RM, Src1);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002665 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2666 NextBr->getTargetFalse());
2667 // Skip over the following branch instruction.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002668 Context.advanceNext();
2669 return;
2670 }
2671 }
2672
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002673 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
Matt Wala43ff7eb2014-06-18 10:30:07 -07002674 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbc004632014-09-16 15:09:10 -07002675 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002676 if (Src0->getType() == IceType_i64) {
2677 InstIcmp::ICond Condition = Inst->getCondition();
2678 size_t Index = static_cast<size_t>(Condition);
2679 assert(Index < TableIcmp64Size);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002680 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2681 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002682 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2683 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2684 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2685 InstX8632Label *Label = InstX8632Label::create(Func, this);
2686 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002687 _cmp(Src0LoRM, Src1LoRI);
Jan Voungbd385e42014-09-18 18:18:10 -07002688 _br(CondX86::Br_ne, Label);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002689 _cmp(Src0HiRM, Src1HiRI);
Jan Voungbd385e42014-09-18 18:18:10 -07002690 _br(CondX86::Br_ne, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07002691 _mov_nonkillable(Dest, (Condition == InstIcmp::Eq ? One : Zero));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002692 Context.insert(Label);
2693 } else {
2694 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2695 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2696 _mov(Dest, One);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002697 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002698 _br(TableIcmp64[Index].C1, LabelTrue);
2699 _br(TableIcmp64[Index].C2, LabelFalse);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002700 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002701 _br(TableIcmp64[Index].C3, LabelTrue);
2702 Context.insert(LabelFalse);
Jim Stichnoth47752552014-10-13 17:15:08 -07002703 _mov_nonkillable(Dest, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002704 Context.insert(LabelTrue);
2705 }
2706 return;
2707 }
2708
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002709 // cmp b, c
Jim Stichnothad403532014-09-25 12:44:17 -07002710 Operand *Src0RM =
2711 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002712 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002713 _cmp(Src0RM, Src1);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002714 _mov(Dest, One);
2715 _br(getIcmp32Mapping(Inst->getCondition()), Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07002716 _mov_nonkillable(Dest, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002717 Context.insert(Label);
2718}
2719
Matt Wala49889232014-07-18 12:45:09 -07002720void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002721 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2722 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
Jan Voungbc004632014-09-16 15:09:10 -07002723 ConstantInteger32 *ElementIndex =
2724 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
Matt Wala49889232014-07-18 12:45:09 -07002725 // Only constant indices are allowed in PNaCl IR.
2726 assert(ElementIndex);
2727 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002728 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
Matt Wala49889232014-07-18 12:45:09 -07002729
Matt Walae3777672014-07-31 09:06:17 -07002730 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002731 Type ElementTy = typeElementType(Ty);
2732 Type InVectorElementTy = getInVectorElementType(Ty);
2733
2734 if (ElementTy == IceType_i1) {
2735 // Expand the element to the appropriate size for it to be inserted
2736 // in the vector.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002737 Variable *Expanded = Func->makeVariable(InVectorElementTy);
Matt Walae3777672014-07-31 09:06:17 -07002738 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2739 ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002740 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002741 ElementToInsertNotLegalized = Expanded;
Matt Wala49889232014-07-18 12:45:09 -07002742 }
2743
Matt Wala0a450512014-07-30 12:44:39 -07002744 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2745 // Use insertps, pinsrb, pinsrw, or pinsrd.
Matt Walae3777672014-07-31 09:06:17 -07002746 Operand *ElementRM =
2747 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2748 Operand *SourceVectRM =
2749 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07002750 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002751 _movp(T, SourceVectRM);
Matt Wala0a450512014-07-30 12:44:39 -07002752 if (Ty == IceType_v4f32)
Jan Voungbc004632014-09-16 15:09:10 -07002753 _insertps(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index << 4));
Matt Wala0a450512014-07-30 12:44:39 -07002754 else
Jan Voungbc004632014-09-16 15:09:10 -07002755 _pinsr(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index));
Matt Wala0a450512014-07-30 12:44:39 -07002756 _movp(Inst->getDest(), T);
2757 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2758 // Use shufps or movss.
Matt Walae3777672014-07-31 09:06:17 -07002759 Variable *ElementR = NULL;
2760 Operand *SourceVectRM =
2761 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2762
Matt Wala49889232014-07-18 12:45:09 -07002763 if (InVectorElementTy == IceType_f32) {
Matt Walae3777672014-07-31 09:06:17 -07002764 // ElementR will be in an XMM register since it is floating point.
2765 ElementR = legalizeToVar(ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002766 } else {
2767 // Copy an integer to an XMM register.
Matt Walae3777672014-07-31 09:06:17 -07002768 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2769 ElementR = makeReg(Ty);
2770 _movd(ElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002771 }
2772
Matt Walacfe51462014-07-25 15:57:56 -07002773 if (Index == 0) {
2774 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002775 _movp(T, SourceVectRM);
2776 _movss(T, ElementR);
Matt Walacfe51462014-07-25 15:57:56 -07002777 _movp(Inst->getDest(), T);
2778 return;
2779 }
2780
Matt Wala49889232014-07-18 12:45:09 -07002781 // shufps treats the source and desination operands as vectors of
2782 // four doublewords. The destination's two high doublewords are
2783 // selected from the source operand and the two low doublewords are
2784 // selected from the (original value of) the destination operand.
2785 // An insertelement operation can be effected with a sequence of two
2786 // shufps operations with appropriate masks. In all cases below,
2787 // Element[0] is being inserted into SourceVectOperand. Indices are
2788 // ordered from left to right.
2789 //
Matt Walae3777672014-07-31 09:06:17 -07002790 // insertelement into index 1 (result is stored in ElementR):
2791 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
2792 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
Matt Wala49889232014-07-18 12:45:09 -07002793 //
2794 // insertelement into index 2 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002795 // T := SourceVectRM
2796 // ElementR := ElementR[0, 0] T[0, 3]
2797 // T := T[0, 1] ElementR[0, 3]
Matt Wala49889232014-07-18 12:45:09 -07002798 //
2799 // insertelement into index 3 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002800 // T := SourceVectRM
2801 // ElementR := ElementR[0, 0] T[0, 2]
2802 // T := T[0, 1] ElementR[3, 0]
Jim Stichnothfac55172014-10-01 13:06:21 -07002803 const unsigned char Mask1[3] = { 0, 192, 128 };
2804 const unsigned char Mask2[3] = { 227, 196, 52 };
Matt Wala49889232014-07-18 12:45:09 -07002805
Jan Voungbc004632014-09-16 15:09:10 -07002806 Constant *Mask1Constant =
2807 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]);
2808 Constant *Mask2Constant =
2809 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]);
Matt Wala49889232014-07-18 12:45:09 -07002810
Matt Walacfe51462014-07-25 15:57:56 -07002811 if (Index == 1) {
Matt Walae3777672014-07-31 09:06:17 -07002812 _shufps(ElementR, SourceVectRM, Mask1Constant);
2813 _shufps(ElementR, SourceVectRM, Mask2Constant);
2814 _movp(Inst->getDest(), ElementR);
Matt Wala49889232014-07-18 12:45:09 -07002815 } else {
2816 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002817 _movp(T, SourceVectRM);
2818 _shufps(ElementR, T, Mask1Constant);
2819 _shufps(T, ElementR, Mask2Constant);
Matt Wala49889232014-07-18 12:45:09 -07002820 _movp(Inst->getDest(), T);
2821 }
Matt Wala49889232014-07-18 12:45:09 -07002822 } else {
2823 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2824 // Spill the value to a stack slot and perform the insertion in
2825 // memory.
Matt Wala49889232014-07-18 12:45:09 -07002826 //
Matt Walae3777672014-07-31 09:06:17 -07002827 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002828 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002829 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07002830 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002831 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002832
2833 // Compute the location of the position to insert in memory.
2834 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2835 OperandX8632Mem *Loc =
2836 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002837 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
Matt Wala49889232014-07-18 12:45:09 -07002838
2839 Variable *T = makeReg(Ty);
2840 _movp(T, Slot);
2841 _movp(Inst->getDest(), T);
2842 }
2843}
2844
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002845void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2846 switch (Instr->getIntrinsicInfo().ID) {
Jan Vounga3a01a22014-07-14 10:32:41 -07002847 case Intrinsics::AtomicCmpxchg: {
Jan Voung5cd240d2014-06-25 10:36:46 -07002848 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002849 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002850 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2851 return;
2852 }
2853 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002854 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002855 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2856 return;
2857 }
Jan Vounga3a01a22014-07-14 10:32:41 -07002858 Variable *DestPrev = Instr->getDest();
2859 Operand *PtrToMem = Instr->getArg(0);
2860 Operand *Expected = Instr->getArg(1);
2861 Operand *Desired = Instr->getArg(2);
Jan Voungc820ddf2014-07-29 14:38:51 -07002862 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2863 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002864 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
Jan Voung5cd240d2014-06-25 10:36:46 -07002865 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002866 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002867 case Intrinsics::AtomicFence:
Jan Voung5cd240d2014-06-25 10:36:46 -07002868 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002869 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002870 Func->setError("Unexpected memory ordering for AtomicFence");
2871 return;
2872 }
2873 _mfence();
2874 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002875 case Intrinsics::AtomicFenceAll:
Jan Voung5cd240d2014-06-25 10:36:46 -07002876 // NOTE: FenceAll should prevent and load/store from being moved
2877 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2878 // instruction is currently marked coarsely as "HasSideEffects".
2879 _mfence();
2880 return;
2881 case Intrinsics::AtomicIsLockFree: {
2882 // X86 is always lock free for 8/16/32/64 bit accesses.
2883 // TODO(jvoung): Since the result is constant when given a constant
2884 // byte size, this opens up DCE opportunities.
2885 Operand *ByteSize = Instr->getArg(0);
2886 Variable *Dest = Instr->getDest();
Jan Voungbc004632014-09-16 15:09:10 -07002887 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002888 Constant *Result;
2889 switch (CI->getValue()) {
2890 default:
2891 // Some x86-64 processors support the cmpxchg16b intruction, which
2892 // can make 16-byte operations lock free (when used with the LOCK
2893 // prefix). However, that's not supported in 32-bit mode, so just
2894 // return 0 even for large sizes.
2895 Result = Ctx->getConstantZero(IceType_i32);
2896 break;
2897 case 1:
2898 case 2:
2899 case 4:
2900 case 8:
Jan Voungbc004632014-09-16 15:09:10 -07002901 Result = Ctx->getConstantInt32(IceType_i32, 1);
Jan Voung5cd240d2014-06-25 10:36:46 -07002902 break;
2903 }
2904 _mov(Dest, Result);
2905 return;
2906 }
2907 // The PNaCl ABI requires the byte size to be a compile-time constant.
2908 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2909 return;
2910 }
2911 case Intrinsics::AtomicLoad: {
2912 // We require the memory address to be naturally aligned.
2913 // Given that is the case, then normal loads are atomic.
2914 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002915 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002916 Func->setError("Unexpected memory ordering for AtomicLoad");
2917 return;
2918 }
2919 Variable *Dest = Instr->getDest();
2920 if (Dest->getType() == IceType_i64) {
2921 // Follow what GCC does and use a movq instead of what lowerLoad()
2922 // normally does (split the load into two).
2923 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2924 // can't happen anyway, since this is x86-32 and integer arithmetic only
2925 // happens on 32-bit quantities.
2926 Variable *T = makeReg(IceType_f64);
2927 OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64);
2928 _movq(T, Addr);
2929 // Then cast the bits back out of the XMM register to the i64 Dest.
2930 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2931 lowerCast(Cast);
Jan Vounge6e497d2014-07-30 10:06:03 -07002932 // Make sure that the atomic load isn't elided when unused.
Jan Voung5cd240d2014-06-25 10:36:46 -07002933 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2934 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2935 return;
2936 }
2937 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2938 lowerLoad(Load);
Jan Vounge6e497d2014-07-30 10:06:03 -07002939 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2940 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2941 // insert the FakeUse on the last-inserted instruction's dest.
Jim Stichnoth6e992142014-07-30 14:45:20 -07002942 Context.insert(
2943 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
Jan Voung5cd240d2014-06-25 10:36:46 -07002944 return;
2945 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002946 case Intrinsics::AtomicRMW:
Jan Voung5cd240d2014-06-25 10:36:46 -07002947 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002948 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002949 Func->setError("Unexpected memory ordering for AtomicRMW");
2950 return;
2951 }
2952 lowerAtomicRMW(Instr->getDest(),
Jan Voungbc004632014-09-16 15:09:10 -07002953 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
Jim Stichnothfac55172014-10-01 13:06:21 -07002954 Instr->getArg(0))->getValue()),
Jan Voung5cd240d2014-06-25 10:36:46 -07002955 Instr->getArg(1), Instr->getArg(2));
2956 return;
2957 case Intrinsics::AtomicStore: {
2958 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothfac55172014-10-01 13:06:21 -07002959 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002960 Func->setError("Unexpected memory ordering for AtomicStore");
2961 return;
2962 }
2963 // We require the memory address to be naturally aligned.
2964 // Given that is the case, then normal stores are atomic.
2965 // Add a fence after the store to make it visible.
2966 Operand *Value = Instr->getArg(0);
2967 Operand *Ptr = Instr->getArg(1);
2968 if (Value->getType() == IceType_i64) {
2969 // Use a movq instead of what lowerStore() normally does
2970 // (split the store into two), following what GCC does.
2971 // Cast the bits from int -> to an xmm register first.
2972 Variable *T = makeReg(IceType_f64);
2973 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2974 lowerCast(Cast);
2975 // Then store XMM w/ a movq.
2976 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64);
2977 _storeq(T, Addr);
2978 _mfence();
2979 return;
2980 }
2981 InstStore *Store = InstStore::create(Func, Value, Ptr);
2982 lowerStore(Store);
2983 _mfence();
2984 return;
2985 }
Jan Voung7fa813b2014-07-18 13:01:08 -07002986 case Intrinsics::Bswap: {
2987 Variable *Dest = Instr->getDest();
2988 Operand *Val = Instr->getArg(0);
2989 // In 32-bit mode, bswap only works on 32-bit arguments, and the
2990 // argument must be a register. Use rotate left for 16-bit bswap.
2991 if (Val->getType() == IceType_i64) {
2992 Variable *T_Lo = legalizeToVar(loOperand(Val));
2993 Variable *T_Hi = legalizeToVar(hiOperand(Val));
2994 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2995 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2996 _bswap(T_Lo);
2997 _bswap(T_Hi);
2998 _mov(DestLo, T_Hi);
2999 _mov(DestHi, T_Lo);
3000 } else if (Val->getType() == IceType_i32) {
3001 Variable *T = legalizeToVar(Val);
3002 _bswap(T);
3003 _mov(Dest, T);
3004 } else {
3005 assert(Val->getType() == IceType_i16);
3006 Val = legalize(Val);
Jan Voungbc004632014-09-16 15:09:10 -07003007 Constant *Eight = Ctx->getConstantInt32(IceType_i16, 8);
Jan Voung7fa813b2014-07-18 13:01:08 -07003008 Variable *T = NULL;
3009 _mov(T, Val);
3010 _rol(T, Eight);
3011 _mov(Dest, T);
3012 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003013 return;
Jan Voung7fa813b2014-07-18 13:01:08 -07003014 }
Jan Vounge4da26f2014-07-15 17:52:39 -07003015 case Intrinsics::Ctpop: {
3016 Variable *Dest = Instr->getDest();
3017 Operand *Val = Instr->getArg(0);
Jan Voung3a569182014-09-29 10:16:01 -07003018 InstCall *Call =
3019 makeHelperCall(isInt32Asserting32Or64(Val->getType()) ? "__popcountsi2"
3020 : "__popcountdi2",
3021 Dest, 1);
Jan Vounge4da26f2014-07-15 17:52:39 -07003022 Call->addArg(Val);
3023 lowerCall(Call);
3024 // The popcount helpers always return 32-bit values, while the intrinsic's
3025 // signature matches the native POPCNT instruction and fills a 64-bit reg
3026 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
3027 // the user doesn't do that in the IR. If the user does that in the IR,
3028 // then this zero'ing instruction is dead and gets optimized out.
3029 if (Val->getType() == IceType_i64) {
3030 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3031 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3032 _mov(DestHi, Zero);
3033 }
3034 return;
3035 }
3036 case Intrinsics::Ctlz: {
3037 // The "is zero undef" parameter is ignored and we always return
3038 // a well-defined value.
3039 Operand *Val = legalize(Instr->getArg(0));
3040 Operand *FirstVal;
3041 Operand *SecondVal = NULL;
3042 if (Val->getType() == IceType_i64) {
3043 FirstVal = loOperand(Val);
3044 SecondVal = hiOperand(Val);
3045 } else {
3046 FirstVal = Val;
3047 }
3048 const bool IsCttz = false;
3049 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3050 SecondVal);
3051 return;
3052 }
3053 case Intrinsics::Cttz: {
3054 // The "is zero undef" parameter is ignored and we always return
3055 // a well-defined value.
3056 Operand *Val = legalize(Instr->getArg(0));
3057 Operand *FirstVal;
3058 Operand *SecondVal = NULL;
3059 if (Val->getType() == IceType_i64) {
3060 FirstVal = hiOperand(Val);
3061 SecondVal = loOperand(Val);
3062 } else {
3063 FirstVal = Val;
3064 }
3065 const bool IsCttz = true;
3066 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3067 SecondVal);
3068 return;
3069 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003070 case Intrinsics::Longjmp: {
3071 InstCall *Call = makeHelperCall("longjmp", NULL, 2);
3072 Call->addArg(Instr->getArg(0));
3073 Call->addArg(Instr->getArg(1));
3074 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003075 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003076 }
3077 case Intrinsics::Memcpy: {
3078 // In the future, we could potentially emit an inline memcpy/memset, etc.
3079 // for intrinsic calls w/ a known length.
3080 InstCall *Call = makeHelperCall("memcpy", NULL, 3);
3081 Call->addArg(Instr->getArg(0));
3082 Call->addArg(Instr->getArg(1));
3083 Call->addArg(Instr->getArg(2));
3084 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003085 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003086 }
3087 case Intrinsics::Memmove: {
3088 InstCall *Call = makeHelperCall("memmove", NULL, 3);
3089 Call->addArg(Instr->getArg(0));
3090 Call->addArg(Instr->getArg(1));
3091 Call->addArg(Instr->getArg(2));
3092 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003093 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003094 }
3095 case Intrinsics::Memset: {
3096 // The value operand needs to be extended to a stack slot size
Matt Wala105b7042014-08-11 19:56:19 -07003097 // because the PNaCl ABI requires arguments to be at least 32 bits
3098 // wide.
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003099 Operand *ValOp = Instr->getArg(1);
3100 assert(ValOp->getType() == IceType_i8);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003101 Variable *ValExt = Func->makeVariable(stackSlotType());
Jan Voung957c50d2014-07-21 14:05:29 -07003102 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003103 InstCall *Call = makeHelperCall("memset", NULL, 3);
3104 Call->addArg(Instr->getArg(0));
3105 Call->addArg(ValExt);
3106 Call->addArg(Instr->getArg(2));
3107 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003108 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003109 }
3110 case Intrinsics::NaClReadTP: {
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003111 if (Ctx->getFlags().UseSandboxing) {
3112 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnothfac55172014-10-01 13:06:21 -07003113 Operand *Src = OperandX8632Mem::create(
3114 Func, IceType_i32, NULL, Zero, NULL, 0, OperandX8632Mem::SegReg_GS);
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003115 Variable *Dest = Instr->getDest();
3116 Variable *T = NULL;
3117 _mov(T, Src);
3118 _mov(Dest, T);
3119 } else {
3120 InstCall *Call = makeHelperCall("__nacl_read_tp", Instr->getDest(), 0);
3121 lowerCall(Call);
3122 }
Jan Voung5cd240d2014-06-25 10:36:46 -07003123 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003124 }
3125 case Intrinsics::Setjmp: {
3126 InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1);
3127 Call->addArg(Instr->getArg(0));
3128 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003129 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003130 }
Jan Voungf37fbbe2014-07-09 16:13:13 -07003131 case Intrinsics::Sqrt: {
3132 Operand *Src = legalize(Instr->getArg(0));
3133 Variable *Dest = Instr->getDest();
3134 Variable *T = makeReg(Dest->getType());
3135 _sqrtss(T, Src);
3136 _mov(Dest, T);
3137 return;
3138 }
Jan Voung7b34b592014-07-18 13:12:58 -07003139 case Intrinsics::Stacksave: {
Jan Voungbd385e42014-09-18 18:18:10 -07003140 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung7b34b592014-07-18 13:12:58 -07003141 Variable *Dest = Instr->getDest();
3142 _mov(Dest, esp);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003143 return;
Jan Voung7b34b592014-07-18 13:12:58 -07003144 }
3145 case Intrinsics::Stackrestore: {
Jan Voungbd385e42014-09-18 18:18:10 -07003146 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth47752552014-10-13 17:15:08 -07003147 _mov_nonkillable(esp, Instr->getArg(0));
Jan Voung7b34b592014-07-18 13:12:58 -07003148 return;
3149 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003150 case Intrinsics::Trap:
3151 _ud2();
Jan Voung5cd240d2014-06-25 10:36:46 -07003152 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003153 case Intrinsics::UnknownIntrinsic:
3154 Func->setError("Should not be lowering UnknownIntrinsic");
3155 return;
3156 }
3157 return;
3158}
3159
Jan Vounga3a01a22014-07-14 10:32:41 -07003160void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
3161 Operand *Expected, Operand *Desired) {
3162 if (Expected->getType() == IceType_i64) {
3163 // Reserve the pre-colored registers first, before adding any more
3164 // infinite-weight variables from FormMemoryOperand's legalization.
Jan Voungbd385e42014-09-18 18:18:10 -07003165 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3166 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3167 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3168 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003169 _mov(T_eax, loOperand(Expected));
3170 _mov(T_edx, hiOperand(Expected));
3171 _mov(T_ebx, loOperand(Desired));
3172 _mov(T_ecx, hiOperand(Desired));
3173 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3174 const bool Locked = true;
3175 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3176 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3177 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3178 _mov(DestLo, T_eax);
3179 _mov(DestHi, T_edx);
3180 return;
3181 }
Jan Voungbd385e42014-09-18 18:18:10 -07003182 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003183 _mov(T_eax, Expected);
3184 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3185 Variable *DesiredReg = legalizeToVar(Desired);
3186 const bool Locked = true;
3187 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3188 _mov(DestPrev, T_eax);
3189}
3190
Jan Voungc820ddf2014-07-29 14:38:51 -07003191bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
3192 Operand *Expected,
3193 Operand *Desired) {
3194 if (Ctx->getOptLevel() == Opt_m1)
3195 return false;
3196 // Peek ahead a few instructions and see how Dest is used.
3197 // It's very common to have:
3198 //
3199 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3200 // [%y_phi = ...] // list of phi stores
3201 // %p = icmp eq i32 %x, %expected
3202 // br i1 %p, label %l1, label %l2
3203 //
3204 // which we can optimize into:
3205 //
3206 // %x = <cmpxchg code>
3207 // [%y_phi = ...] // list of phi stores
3208 // br eq, %l1, %l2
3209 InstList::iterator I = Context.getCur();
3210 // I is currently the InstIntrinsicCall. Peek past that.
3211 // This assumes that the atomic cmpxchg has not been lowered yet,
3212 // so that the instructions seen in the scan from "Cur" is simple.
3213 assert(llvm::isa<InstIntrinsicCall>(*I));
3214 Inst *NextInst = Context.getNextInst(I);
3215 if (!NextInst)
3216 return false;
3217 // There might be phi assignments right before the compare+branch, since this
3218 // could be a backward branch for a loop. This placement of assignments is
3219 // determined by placePhiStores().
3220 std::vector<InstAssign *> PhiAssigns;
3221 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3222 if (PhiAssign->getDest() == Dest)
3223 return false;
3224 PhiAssigns.push_back(PhiAssign);
3225 NextInst = Context.getNextInst(I);
3226 if (!NextInst)
3227 return false;
3228 }
3229 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3230 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3231 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3232 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3233 return false;
3234 }
3235 NextInst = Context.getNextInst(I);
3236 if (!NextInst)
3237 return false;
3238 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3239 if (!NextBr->isUnconditional() &&
3240 NextCmp->getDest() == NextBr->getCondition() &&
3241 NextBr->isLastUse(NextCmp->getDest())) {
3242 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3243 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3244 // Lower the phi assignments now, before the branch (same placement
3245 // as before).
3246 InstAssign *PhiAssign = PhiAssigns[i];
Jan Voungc820ddf2014-07-29 14:38:51 -07003247 PhiAssign->setDeleted();
Jim Stichnoth89d79562014-08-27 13:50:03 -07003248 lowerAssign(PhiAssign);
Jan Voungc820ddf2014-07-29 14:38:51 -07003249 Context.advanceNext();
3250 }
Jan Voungbd385e42014-09-18 18:18:10 -07003251 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
Jan Voungc820ddf2014-07-29 14:38:51 -07003252 // Skip over the old compare and branch, by deleting them.
3253 NextCmp->setDeleted();
3254 NextBr->setDeleted();
3255 Context.advanceNext();
3256 Context.advanceNext();
3257 return true;
3258 }
3259 }
3260 }
3261 return false;
3262}
3263
Jan Voung5cd240d2014-06-25 10:36:46 -07003264void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3265 Operand *Ptr, Operand *Val) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003266 bool NeedsCmpxchg = false;
3267 LowerBinOp Op_Lo = NULL;
3268 LowerBinOp Op_Hi = NULL;
Jan Voung5cd240d2014-06-25 10:36:46 -07003269 switch (Operation) {
3270 default:
3271 Func->setError("Unknown AtomicRMW operation");
3272 return;
3273 case Intrinsics::AtomicAdd: {
3274 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003275 // All the fall-through paths must set this to true, but use this
3276 // for asserting.
3277 NeedsCmpxchg = true;
3278 Op_Lo = &TargetX8632::_add;
3279 Op_Hi = &TargetX8632::_adc;
3280 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003281 }
3282 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3283 const bool Locked = true;
3284 Variable *T = NULL;
3285 _mov(T, Val);
3286 _xadd(Addr, T, Locked);
3287 _mov(Dest, T);
3288 return;
3289 }
3290 case Intrinsics::AtomicSub: {
3291 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003292 NeedsCmpxchg = true;
3293 Op_Lo = &TargetX8632::_sub;
3294 Op_Hi = &TargetX8632::_sbb;
3295 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003296 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003297 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3298 const bool Locked = true;
3299 Variable *T = NULL;
3300 _mov(T, Val);
3301 _neg(T);
3302 _xadd(Addr, T, Locked);
3303 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003304 return;
3305 }
3306 case Intrinsics::AtomicOr:
Jan Vounga3a01a22014-07-14 10:32:41 -07003307 // TODO(jvoung): If Dest is null or dead, then some of these
3308 // operations do not need an "exchange", but just a locked op.
3309 // That appears to be "worth" it for sub, or, and, and xor.
3310 // xadd is probably fine vs lock add for add, and xchg is fine
3311 // vs an atomic store.
3312 NeedsCmpxchg = true;
3313 Op_Lo = &TargetX8632::_or;
3314 Op_Hi = &TargetX8632::_or;
3315 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003316 case Intrinsics::AtomicAnd:
Jan Vounga3a01a22014-07-14 10:32:41 -07003317 NeedsCmpxchg = true;
3318 Op_Lo = &TargetX8632::_and;
3319 Op_Hi = &TargetX8632::_and;
3320 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003321 case Intrinsics::AtomicXor:
Jan Vounga3a01a22014-07-14 10:32:41 -07003322 NeedsCmpxchg = true;
3323 Op_Lo = &TargetX8632::_xor;
3324 Op_Hi = &TargetX8632::_xor;
3325 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003326 case Intrinsics::AtomicExchange:
Jan Vounga3a01a22014-07-14 10:32:41 -07003327 if (Dest->getType() == IceType_i64) {
3328 NeedsCmpxchg = true;
3329 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3330 // just need to be moved to the ecx and ebx registers.
3331 Op_Lo = NULL;
3332 Op_Hi = NULL;
3333 break;
3334 }
3335 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3336 Variable *T = NULL;
3337 _mov(T, Val);
3338 _xchg(Addr, T);
3339 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003340 return;
3341 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003342 // Otherwise, we need a cmpxchg loop.
Jim Stichnoth6e992142014-07-30 14:45:20 -07003343 (void)NeedsCmpxchg;
Jan Vounga3a01a22014-07-14 10:32:41 -07003344 assert(NeedsCmpxchg);
3345 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3346}
3347
3348void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
3349 Variable *Dest, Operand *Ptr,
3350 Operand *Val) {
3351 // Expand a more complex RMW operation as a cmpxchg loop:
3352 // For 64-bit:
3353 // mov eax, [ptr]
3354 // mov edx, [ptr + 4]
3355 // .LABEL:
3356 // mov ebx, eax
3357 // <Op_Lo> ebx, <desired_adj_lo>
3358 // mov ecx, edx
3359 // <Op_Hi> ecx, <desired_adj_hi>
3360 // lock cmpxchg8b [ptr]
3361 // jne .LABEL
3362 // mov <dest_lo>, eax
3363 // mov <dest_lo>, edx
3364 //
3365 // For 32-bit:
3366 // mov eax, [ptr]
3367 // .LABEL:
3368 // mov <reg>, eax
3369 // op <reg>, [desired_adj]
3370 // lock cmpxchg [ptr], <reg>
3371 // jne .LABEL
3372 // mov <dest>, eax
3373 //
3374 // If Op_{Lo,Hi} are NULL, then just copy the value.
3375 Val = legalize(Val);
3376 Type Ty = Val->getType();
3377 if (Ty == IceType_i64) {
Jan Voungbd385e42014-09-18 18:18:10 -07003378 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3379 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003380 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3381 _mov(T_eax, loOperand(Addr));
3382 _mov(T_edx, hiOperand(Addr));
Jan Voungbd385e42014-09-18 18:18:10 -07003383 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3384 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003385 InstX8632Label *Label = InstX8632Label::create(Func, this);
3386 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
3387 if (!IsXchg8b) {
3388 Context.insert(Label);
3389 _mov(T_ebx, T_eax);
3390 (this->*Op_Lo)(T_ebx, loOperand(Val));
3391 _mov(T_ecx, T_edx);
3392 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3393 } else {
3394 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3395 // It just needs the Val loaded into ebx and ecx.
3396 // That can also be done before the loop.
3397 _mov(T_ebx, loOperand(Val));
3398 _mov(T_ecx, hiOperand(Val));
3399 Context.insert(Label);
3400 }
3401 const bool Locked = true;
3402 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003403 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003404 if (!IsXchg8b) {
3405 // If Val is a variable, model the extended live range of Val through
3406 // the end of the loop, since it will be re-used by the loop.
3407 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3408 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3409 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3410 Context.insert(InstFakeUse::create(Func, ValLo));
3411 Context.insert(InstFakeUse::create(Func, ValHi));
3412 }
3413 } else {
3414 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3415 Context.insert(InstFakeUse::create(Func, T_ebx));
3416 Context.insert(InstFakeUse::create(Func, T_ecx));
3417 }
3418 // The address base is also reused in the loop.
3419 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3420 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3421 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3422 _mov(DestLo, T_eax);
3423 _mov(DestHi, T_edx);
3424 return;
3425 }
3426 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
Jan Voungbd385e42014-09-18 18:18:10 -07003427 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003428 _mov(T_eax, Addr);
3429 InstX8632Label *Label = InstX8632Label::create(Func, this);
3430 Context.insert(Label);
3431 // We want to pick a different register for T than Eax, so don't use
3432 // _mov(T == NULL, T_eax).
3433 Variable *T = makeReg(Ty);
3434 _mov(T, T_eax);
3435 (this->*Op_Lo)(T, Val);
3436 const bool Locked = true;
3437 _cmpxchg(Addr, T_eax, T, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003438 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003439 // If Val is a variable, model the extended live range of Val through
3440 // the end of the loop, since it will be re-used by the loop.
3441 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3442 Context.insert(InstFakeUse::create(Func, ValVar));
3443 }
3444 // The address base is also reused in the loop.
3445 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3446 _mov(Dest, T_eax);
Jan Voung5cd240d2014-06-25 10:36:46 -07003447}
3448
Jan Vounge4da26f2014-07-15 17:52:39 -07003449// Lowers count {trailing, leading} zeros intrinsic.
3450//
3451// We could do constant folding here, but that should have
3452// been done by the front-end/middle-end optimizations.
3453void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3454 Operand *FirstVal, Operand *SecondVal) {
3455 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3456 // Then the instructions will handle the Val == 0 case much more simply
3457 // and won't require conversion from bit position to number of zeros.
3458 //
3459 // Otherwise:
3460 // bsr IF_NOT_ZERO, Val
3461 // mov T_DEST, 63
3462 // cmovne T_DEST, IF_NOT_ZERO
3463 // xor T_DEST, 31
3464 // mov DEST, T_DEST
3465 //
3466 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3467 // register. Also, bsf and bsr require their dest to be a register.
3468 //
3469 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3470 // E.g., for 000... 00001100, bsr will say that the most significant bit
3471 // set is at position 3, while the number of leading zeros is 28. Xor is
3472 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3473 //
3474 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3475 // are all zero, and compute the result for that case (checking the lower
3476 // 32 bits). Then actually compute the result for the upper bits and
3477 // cmov in the result from the lower computation if the earlier speculation
3478 // was correct.
3479 //
3480 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3481 // bit position conversion, and the speculation is reversed.
3482 assert(Ty == IceType_i32 || Ty == IceType_i64);
3483 Variable *T = makeReg(IceType_i32);
Matt Wala53c5e602014-07-22 16:39:38 -07003484 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
Jan Vounge4da26f2014-07-15 17:52:39 -07003485 if (Cttz) {
Matt Wala53c5e602014-07-22 16:39:38 -07003486 _bsf(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003487 } else {
Matt Wala53c5e602014-07-22 16:39:38 -07003488 _bsr(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003489 }
3490 Variable *T_Dest = makeReg(IceType_i32);
Jan Voungbc004632014-09-16 15:09:10 -07003491 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32);
3492 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31);
Jan Vounge4da26f2014-07-15 17:52:39 -07003493 if (Cttz) {
3494 _mov(T_Dest, ThirtyTwo);
3495 } else {
Jan Voungbc004632014-09-16 15:09:10 -07003496 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63);
Jan Vounge4da26f2014-07-15 17:52:39 -07003497 _mov(T_Dest, SixtyThree);
3498 }
Jan Voungbd385e42014-09-18 18:18:10 -07003499 _cmov(T_Dest, T, CondX86::Br_ne);
Jan Vounge4da26f2014-07-15 17:52:39 -07003500 if (!Cttz) {
3501 _xor(T_Dest, ThirtyOne);
3502 }
3503 if (Ty == IceType_i32) {
3504 _mov(Dest, T_Dest);
3505 return;
3506 }
3507 _add(T_Dest, ThirtyTwo);
3508 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3509 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3510 // Will be using "test" on this, so we need a registerized variable.
3511 Variable *SecondVar = legalizeToVar(SecondVal);
3512 Variable *T_Dest2 = makeReg(IceType_i32);
3513 if (Cttz) {
3514 _bsf(T_Dest2, SecondVar);
3515 } else {
3516 _bsr(T_Dest2, SecondVar);
3517 _xor(T_Dest2, ThirtyOne);
3518 }
3519 _test(SecondVar, SecondVar);
Jan Voungbd385e42014-09-18 18:18:10 -07003520 _cmov(T_Dest2, T_Dest, CondX86::Br_e);
Jan Vounge4da26f2014-07-15 17:52:39 -07003521 _mov(DestLo, T_Dest2);
3522 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3523}
3524
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003525namespace {
3526
3527bool isAdd(const Inst *Inst) {
3528 if (const InstArithmetic *Arith =
3529 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3530 return (Arith->getOp() == InstArithmetic::Add);
3531 }
3532 return false;
3533}
3534
Jim Stichnoth89d79562014-08-27 13:50:03 -07003535void dumpAddressOpt(const Cfg *Func, const Variable *Base,
3536 const Variable *Index, uint16_t Shift, int32_t Offset,
3537 const Inst *Reason) {
3538 if (!Func->getContext()->isVerbose(IceV_AddrOpt))
3539 return;
3540 Ostream &Str = Func->getContext()->getStrDump();
3541 Str << "Instruction: ";
3542 Reason->dumpDecorated(Func);
3543 Str << " results in Base=";
3544 if (Base)
3545 Base->dump(Func);
3546 else
3547 Str << "<null>";
3548 Str << ", Index=";
3549 if (Index)
3550 Index->dump(Func);
3551 else
3552 Str << "<null>";
3553 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
3554}
3555
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003556bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var,
3557 const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003558 // Var originates from Var=SrcVar ==>
3559 // set Var:=SrcVar
3560 if (Var == NULL)
3561 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003562 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
3563 assert(!VMetadata->isMultiDef(Var));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003564 if (llvm::isa<InstAssign>(VarAssign)) {
3565 Operand *SrcOp = VarAssign->getSrc(0);
3566 assert(SrcOp);
3567 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003568 if (!VMetadata->isMultiDef(SrcVar) &&
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003569 // TODO: ensure SrcVar stays single-BB
3570 true) {
3571 Var = SrcVar;
3572 Reason = VarAssign;
3573 return true;
3574 }
3575 }
3576 }
3577 }
3578 return false;
3579}
3580
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003581bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable *&Base,
3582 Variable *&Index, uint16_t &Shift,
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003583 const Inst *&Reason) {
3584 // Index==NULL && Base is Base=Var1+Var2 ==>
3585 // set Base=Var1, Index=Var2, Shift=0
3586 if (Base == NULL)
3587 return false;
3588 if (Index != NULL)
3589 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003590 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003591 if (BaseInst == NULL)
3592 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003593 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003594 if (BaseInst->getSrcSize() < 2)
3595 return false;
3596 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003597 if (VMetadata->isMultiDef(Var1))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003598 return false;
3599 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003600 if (VMetadata->isMultiDef(Var2))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003601 return false;
3602 if (isAdd(BaseInst) &&
3603 // TODO: ensure Var1 and Var2 stay single-BB
3604 true) {
3605 Base = Var1;
3606 Index = Var2;
3607 Shift = 0; // should already have been 0
3608 Reason = BaseInst;
3609 return true;
3610 }
3611 }
3612 }
3613 return false;
3614}
3615
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003616bool matchShiftedIndex(const VariablesMetadata *VMetadata, Variable *&Index,
3617 uint16_t &Shift, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003618 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
3619 // Index=Var, Shift+=log2(Const)
3620 if (Index == NULL)
3621 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003622 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003623 if (IndexInst == NULL)
3624 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003625 assert(!VMetadata->isMultiDef(Index));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003626 if (IndexInst->getSrcSize() < 2)
3627 return false;
3628 if (const InstArithmetic *ArithInst =
3629 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
3630 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
Jan Voungbc004632014-09-16 15:09:10 -07003631 if (ConstantInteger32 *Const =
3632 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003633 if (ArithInst->getOp() == InstArithmetic::Mul &&
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003634 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003635 uint64_t Mult = Const->getValue();
3636 uint32_t LogMult;
3637 switch (Mult) {
3638 case 1:
3639 LogMult = 0;
3640 break;
3641 case 2:
3642 LogMult = 1;
3643 break;
3644 case 4:
3645 LogMult = 2;
3646 break;
3647 case 8:
3648 LogMult = 3;
3649 break;
3650 default:
3651 return false;
3652 }
3653 if (Shift + LogMult <= 3) {
3654 Index = Var;
3655 Shift += LogMult;
3656 Reason = IndexInst;
3657 return true;
3658 }
3659 }
3660 }
3661 }
3662 }
3663 return false;
3664}
3665
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003666bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
3667 int32_t &Offset, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003668 // Base is Base=Var+Const || Base is Base=Const+Var ==>
3669 // set Base=Var, Offset+=Const
3670 // Base is Base=Var-Const ==>
3671 // set Base=Var, Offset-=Const
3672 if (Base == NULL)
3673 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003674 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003675 if (BaseInst == NULL)
3676 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003677 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003678 if (const InstArithmetic *ArithInst =
3679 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
3680 if (ArithInst->getOp() != InstArithmetic::Add &&
3681 ArithInst->getOp() != InstArithmetic::Sub)
3682 return false;
3683 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
3684 Variable *Var = NULL;
Jan Voungbc004632014-09-16 15:09:10 -07003685 ConstantInteger32 *Const = NULL;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003686 if (Variable *VariableOperand =
3687 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3688 Var = VariableOperand;
Jan Voungbc004632014-09-16 15:09:10 -07003689 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003690 } else if (IsAdd) {
Jan Voungbc004632014-09-16 15:09:10 -07003691 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003692 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
3693 }
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003694 if (Var == NULL || Const == NULL || VMetadata->isMultiDef(Var))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003695 return false;
Jan Voungbc004632014-09-16 15:09:10 -07003696 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
Jan Voung8acded02014-09-22 18:02:25 -07003697 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
Jan Voungbc004632014-09-16 15:09:10 -07003698 return false;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003699 Base = Var;
Jan Voungbc004632014-09-16 15:09:10 -07003700 Offset += MoreOffset;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003701 Reason = BaseInst;
3702 return true;
3703 }
3704 return false;
3705}
3706
Jim Stichnoth89d79562014-08-27 13:50:03 -07003707void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
3708 Variable *&Index, uint16_t &Shift, int32_t &Offset) {
Jim Stichnoth800dab22014-09-20 12:25:02 -07003709 Func->resetCurrentNode();
Jim Stichnoth89d79562014-08-27 13:50:03 -07003710 if (Func->getContext()->isVerbose(IceV_AddrOpt)) {
3711 Ostream &Str = Func->getContext()->getStrDump();
3712 Str << "\nStarting computeAddressOpt for instruction:\n ";
3713 Instr->dumpDecorated(Func);
3714 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003715 (void)Offset; // TODO: pattern-match for non-zero offsets.
3716 if (Base == NULL)
3717 return;
3718 // If the Base has more than one use or is live across multiple
3719 // blocks, then don't go further. Alternatively (?), never consider
3720 // a transformation that would change a variable that is currently
3721 // *not* live across basic block boundaries into one that *is*.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003722 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003723 return;
3724
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003725 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003726 bool Continue = true;
3727 while (Continue) {
3728 const Inst *Reason = NULL;
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003729 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
3730 matchTransitiveAssign(VMetadata, Index, Reason) ||
3731 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
3732 matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
3733 matchOffsetBase(VMetadata, Base, Offset, Reason)) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003734 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
3735 } else {
3736 Continue = false;
Matt Wala8835b892014-08-11 17:46:58 -07003737 }
3738
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003739 // Index is Index=Var<<Const && Const+Shift<=3 ==>
3740 // Index=Var, Shift+=Const
3741
3742 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
3743 // Index=Var, Shift+=log2(Const)
3744
3745 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
3746 // swap(Index,Base)
3747 // Similar for Base=Const*Var and Base=Var<<Const
3748
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003749 // Index is Index=Var+Const ==>
3750 // set Index=Var, Offset+=(Const<<Shift)
3751
3752 // Index is Index=Const+Var ==>
3753 // set Index=Var, Offset+=(Const<<Shift)
3754
3755 // Index is Index=Var-Const ==>
3756 // set Index=Var, Offset-=(Const<<Shift)
3757
3758 // TODO: consider overflow issues with respect to Offset.
3759 // TODO: handle symbolic constants.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003760 }
3761}
3762
3763} // anonymous namespace
3764
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003765void TargetX8632::lowerLoad(const InstLoad *Inst) {
3766 // A Load instruction can be treated the same as an Assign
3767 // instruction, after the source operand is transformed into an
3768 // OperandX8632Mem operand. Note that the address mode
3769 // optimization already creates an OperandX8632Mem operand, so it
3770 // doesn't need another level of transformation.
3771 Type Ty = Inst->getDest()->getType();
Jan Voung5cd240d2014-06-25 10:36:46 -07003772 Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003773
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003774 // Fuse this load with a subsequent Arithmetic instruction in the
3775 // following situations:
3776 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
3777 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
3778 //
3779 // TODO: Clean up and test thoroughly.
Jan Voung5cd240d2014-06-25 10:36:46 -07003780 // (E.g., if there is an mfence-all make sure the load ends up on the
3781 // same side of the fence).
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003782 //
3783 // TODO: Why limit to Arithmetic instructions? This could probably be
3784 // applied to most any instruction type. Look at all source operands
3785 // in the following instruction, and if there is one instance of the
3786 // load instruction's dest variable, and that instruction ends that
3787 // variable's live range, then make the substitution. Deal with
3788 // commutativity optimization in the arithmetic instruction lowering.
3789 InstArithmetic *NewArith = NULL;
3790 if (InstArithmetic *Arith =
3791 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
3792 Variable *DestLoad = Inst->getDest();
3793 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
3794 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
3795 if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
3796 DestLoad != Src0Arith) {
3797 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3798 Arith->getSrc(0), Src0);
3799 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
3800 Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
3801 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3802 Arith->getSrc(1), Src0);
3803 }
3804 if (NewArith) {
3805 Arith->setDeleted();
3806 Context.advanceNext();
3807 lowerArithmetic(NewArith);
3808 return;
3809 }
3810 }
3811
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003812 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
3813 lowerAssign(Assign);
3814}
3815
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003816void TargetX8632::doAddressOptLoad() {
3817 Inst *Inst = *Context.getCur();
3818 Variable *Dest = Inst->getDest();
3819 Operand *Addr = Inst->getSrc(0);
3820 Variable *Index = NULL;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003821 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003822 int32_t Offset = 0; // TODO: make Constant
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003823 // Vanilla ICE load instructions should not use the segment registers,
3824 // and computeAddressOpt only works at the level of Variables and Constants,
3825 // not other OperandX8632Mem, so there should be no mention of segment
3826 // registers there either.
3827 const OperandX8632Mem::SegmentRegisters SegmentReg =
3828 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003829 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jim Stichnoth89d79562014-08-27 13:50:03 -07003830 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003831 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07003832 Inst->setDeleted();
Jan Voungbc004632014-09-16 15:09:10 -07003833 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003834 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003835 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003836 Context.insert(InstLoad::create(Func, Dest, Addr));
3837 }
3838}
3839
Matt Walac3302742014-08-15 16:21:56 -07003840void TargetX8632::randomlyInsertNop(float Probability) {
3841 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
3842 if (RNG.getTrueWithProbability(Probability)) {
3843 _nop(RNG.next(X86_NUM_NOP_VARIANTS));
3844 }
3845}
3846
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003847void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
3848 Func->setError("Phi found in regular instruction list");
3849}
3850
3851void TargetX8632::lowerRet(const InstRet *Inst) {
3852 Variable *Reg = NULL;
3853 if (Inst->hasRetValue()) {
3854 Operand *Src0 = legalize(Inst->getRetValue());
3855 if (Src0->getType() == IceType_i64) {
Jim Stichnothad403532014-09-25 12:44:17 -07003856 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
3857 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003858 Reg = eax;
3859 Context.insert(InstFakeUse::create(Func, edx));
Jan Voung3a569182014-09-29 10:16:01 -07003860 } else if (isScalarFloatingType(Src0->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003861 _fld(Src0);
Matt Wala928f1292014-07-07 16:50:46 -07003862 } else if (isVectorType(Src0->getType())) {
Jim Stichnothad403532014-09-25 12:44:17 -07003863 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003864 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07003865 _mov(Reg, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003866 }
3867 }
3868 _ret(Reg);
3869 // Add a fake use of esp to make sure esp stays alive for the entire
3870 // function. Otherwise post-call esp adjustments get dead-code
3871 // eliminated. TODO: Are there more places where the fake use
3872 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
3873 // have a ret instruction.
Jan Voungbd385e42014-09-18 18:18:10 -07003874 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003875 Context.insert(InstFakeUse::create(Func, esp));
3876}
3877
3878void TargetX8632::lowerSelect(const InstSelect *Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003879 Variable *Dest = Inst->getDest();
3880 Operand *SrcT = Inst->getTrueOperand();
3881 Operand *SrcF = Inst->getFalseOperand();
Matt Wala9cb61e22014-07-24 09:44:42 -07003882 Operand *Condition = Inst->getCondition();
3883
3884 if (isVectorType(Dest->getType())) {
Matt Wala9cb61e22014-07-24 09:44:42 -07003885 Type SrcTy = SrcT->getType();
3886 Variable *T = makeReg(SrcTy);
Matt Walae3777672014-07-31 09:06:17 -07003887 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3888 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07003889 if (InstructionSet >= SSE4_1) {
3890 // TODO(wala): If the condition operand is a constant, use blendps
3891 // or pblendw.
3892 //
3893 // Use blendvps or pblendvb to implement select.
3894 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3895 SrcTy == IceType_v4f32) {
Matt Walae3777672014-07-31 09:06:17 -07003896 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Jan Voungbd385e42014-09-18 18:18:10 -07003897 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
Matt Walae3777672014-07-31 09:06:17 -07003898 _movp(xmm0, ConditionRM);
Jan Voungbc004632014-09-16 15:09:10 -07003899 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31));
Matt Walae3777672014-07-31 09:06:17 -07003900 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07003901 _blendvps(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003902 _movp(Dest, T);
3903 } else {
3904 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3905 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
Jim Stichnothfac55172014-10-01 13:06:21 -07003906 : IceType_v16i8;
Jan Voungbd385e42014-09-18 18:18:10 -07003907 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003908 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
Matt Walae3777672014-07-31 09:06:17 -07003909 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07003910 _pblendvb(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003911 _movp(Dest, T);
3912 }
3913 return;
3914 }
3915 // Lower select without SSE4.1:
3916 // a=d?b:c ==>
3917 // if elementtype(d) != i1:
3918 // d=sext(d);
3919 // a=(b&d)|(c&~d);
Matt Wala9cb61e22014-07-24 09:44:42 -07003920 Variable *T2 = makeReg(SrcTy);
3921 // Sign extend the condition operand if applicable.
3922 if (SrcTy == IceType_v4f32) {
3923 // The sext operation takes only integer arguments.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003924 Variable *T3 = Func->makeVariable(IceType_v4i32);
Matt Wala9cb61e22014-07-24 09:44:42 -07003925 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3926 _movp(T, T3);
3927 } else if (typeElementType(SrcTy) != IceType_i1) {
3928 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3929 } else {
Matt Walae3777672014-07-31 09:06:17 -07003930 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3931 _movp(T, ConditionRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07003932 }
Matt Wala9cb61e22014-07-24 09:44:42 -07003933 _movp(T2, T);
Matt Walad4799f42014-08-14 14:24:12 -07003934 _pand(T, SrcTRM);
3935 _pandn(T2, SrcFRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07003936 _por(T, T2);
3937 _movp(Dest, T);
Matt Wala9cb61e22014-07-24 09:44:42 -07003938
3939 return;
3940 }
3941
3942 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003943 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Matt Wala43ff7eb2014-06-18 10:30:07 -07003944 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003945 InstX8632Label *Label = InstX8632Label::create(Func, this);
3946
3947 if (Dest->getType() == IceType_i64) {
3948 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3949 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Jim Stichnothad403532014-09-25 12:44:17 -07003950 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
3951 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003952 _cmp(ConditionRM, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003953 _mov(DestLo, SrcLoRI);
3954 _mov(DestHi, SrcHiRI);
Jan Voungbd385e42014-09-18 18:18:10 -07003955 _br(CondX86::Br_ne, Label);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003956 Operand *SrcFLo = loOperand(SrcF);
3957 Operand *SrcFHi = hiOperand(SrcF);
Jim Stichnothad403532014-09-25 12:44:17 -07003958 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
3959 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07003960 _mov_nonkillable(DestLo, SrcLoRI);
3961 _mov_nonkillable(DestHi, SrcHiRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003962 } else {
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003963 _cmp(ConditionRM, Zero);
Jim Stichnothad403532014-09-25 12:44:17 -07003964 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003965 _mov(Dest, SrcT);
Jan Voungbd385e42014-09-18 18:18:10 -07003966 _br(CondX86::Br_ne, Label);
Jim Stichnothad403532014-09-25 12:44:17 -07003967 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07003968 _mov_nonkillable(Dest, SrcF);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003969 }
3970
3971 Context.insert(Label);
3972}
3973
3974void TargetX8632::lowerStore(const InstStore *Inst) {
3975 Operand *Value = Inst->getData();
3976 Operand *Addr = Inst->getAddr();
Jan Voung5cd240d2014-06-25 10:36:46 -07003977 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
Matt Wala105b7042014-08-11 19:56:19 -07003978 Type Ty = NewAddr->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003979
Matt Wala105b7042014-08-11 19:56:19 -07003980 if (Ty == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003981 Value = legalize(Value);
Jim Stichnothad403532014-09-25 12:44:17 -07003982 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
3983 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003984 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
3985 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
Matt Wala105b7042014-08-11 19:56:19 -07003986 } else if (isVectorType(Ty)) {
3987 _storep(legalizeToVar(Value), NewAddr);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003988 } else {
Jim Stichnothad403532014-09-25 12:44:17 -07003989 Value = legalize(Value, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003990 _store(Value, NewAddr);
3991 }
3992}
3993
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003994void TargetX8632::doAddressOptStore() {
3995 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur());
3996 Operand *Data = Inst->getData();
3997 Operand *Addr = Inst->getAddr();
3998 Variable *Index = NULL;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003999 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004000 int32_t Offset = 0; // TODO: make Constant
4001 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004002 // Vanilla ICE store instructions should not use the segment registers,
4003 // and computeAddressOpt only works at the level of Variables and Constants,
4004 // not other OperandX8632Mem, so there should be no mention of segment
4005 // registers there either.
4006 const OperandX8632Mem::SegmentRegisters SegmentReg =
4007 OperandX8632Mem::DefaultSegment;
Jim Stichnoth89d79562014-08-27 13:50:03 -07004008 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004009 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004010 Inst->setDeleted();
Jan Voungbc004632014-09-16 15:09:10 -07004011 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004012 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004013 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004014 Context.insert(InstStore::create(Func, Data, Addr));
4015 }
4016}
4017
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004018void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4019 // This implements the most naive possible lowering.
4020 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4021 Operand *Src0 = Inst->getComparison();
4022 SizeT NumCases = Inst->getNumCases();
4023 // OK, we'll be slightly less naive by forcing Src into a physical
4024 // register if there are 2 or more uses.
4025 if (NumCases >= 2)
4026 Src0 = legalizeToVar(Src0, true);
4027 else
Jim Stichnothad403532014-09-25 12:44:17 -07004028 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004029 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnothcabfa302014-09-03 15:19:12 -07004030 // TODO(stichnot): Correct lowering for IceType_i64.
Jan Voungbc004632014-09-16 15:09:10 -07004031 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004032 _cmp(Src0, Value);
Jan Voungbd385e42014-09-18 18:18:10 -07004033 _br(CondX86::Br_e, Inst->getLabel(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004034 }
4035
4036 _br(Inst->getLabelDefault());
4037}
4038
Matt Walaafeaee42014-08-07 13:47:30 -07004039void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4040 Variable *Dest, Operand *Src0,
4041 Operand *Src1) {
4042 assert(isVectorType(Dest->getType()));
4043 Type Ty = Dest->getType();
4044 Type ElementTy = typeElementType(Ty);
4045 SizeT NumElements = typeNumElements(Ty);
4046
4047 Operand *T = Ctx->getConstantUndef(Ty);
4048 for (SizeT I = 0; I < NumElements; ++I) {
Jan Voungbc004632014-09-16 15:09:10 -07004049 Constant *Index = Ctx->getConstantInt32(IceType_i32, I);
Matt Walaafeaee42014-08-07 13:47:30 -07004050
4051 // Extract the next two inputs.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004052 Variable *Op0 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004053 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004054 Variable *Op1 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004055 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4056
4057 // Perform the arithmetic as a scalar operation.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004058 Variable *Res = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004059 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4060
4061 // Insert the result into position.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004062 Variable *DestT = Func->makeVariable(Ty);
Matt Walaafeaee42014-08-07 13:47:30 -07004063 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4064 T = DestT;
4065 // TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of
4066 // infinite weight temporaries.
4067 }
4068
4069 lowerAssign(InstAssign::create(Func, Dest, T));
4070}
4071
Matt Walace0ca8f2014-07-24 12:34:20 -07004072// The following pattern occurs often in lowered C and C++ code:
4073//
4074// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4075// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4076//
4077// We can eliminate the sext operation by copying the result of pcmpeqd,
4078// pcmpgtd, or cmpps (which produce sign extended results) to the result
4079// of the sext operation.
4080void
4081TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
4082 if (InstCast *NextCast =
4083 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4084 if (NextCast->getCastKind() == InstCast::Sext &&
4085 NextCast->getSrc(0) == SignExtendedResult) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004086 NextCast->setDeleted();
Matt Walace0ca8f2014-07-24 12:34:20 -07004087 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4088 // Skip over the instruction.
Matt Walace0ca8f2014-07-24 12:34:20 -07004089 Context.advanceNext();
4090 }
4091 }
4092}
4093
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004094void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
4095 const SizeT MaxSrcs = 0;
4096 Variable *Dest = NULL;
4097 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
4098 lowerCall(Call);
4099}
4100
Matt Wala9a0168a2014-07-23 14:56:10 -07004101// There is no support for loading or emitting vector constants, so the
4102// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4103// etc. are initialized with register operations.
4104//
4105// TODO(wala): Add limited support for vector constants so that
4106// complex initialization in registers is unnecessary.
4107
Matt Wala83b80362014-07-16 10:21:30 -07004108Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004109 Variable *Reg = makeReg(Ty, RegNum);
4110 // Insert a FakeDef, since otherwise the live range of Reg might
4111 // be overestimated.
4112 Context.insert(InstFakeDef::create(Func, Reg));
4113 _pxor(Reg, Reg);
4114 return Reg;
4115}
4116
Matt Wala9a0168a2014-07-23 14:56:10 -07004117Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
4118 Variable *MinusOnes = makeReg(Ty, RegNum);
4119 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
4120 Context.insert(InstFakeDef::create(Func, MinusOnes));
4121 _pcmpeq(MinusOnes, MinusOnes);
4122 return MinusOnes;
4123}
4124
Matt Wala83b80362014-07-16 10:21:30 -07004125Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004126 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
Matt Wala9a0168a2014-07-23 14:56:10 -07004127 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
Matt Wala83b80362014-07-16 10:21:30 -07004128 _psub(Dest, MinusOne);
4129 return Dest;
4130}
4131
Matt Wala9a0168a2014-07-23 14:56:10 -07004132Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
4133 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4134 Ty == IceType_v16i8);
4135 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4136 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4137 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
Jan Voungbc004632014-09-16 15:09:10 -07004138 _psll(Reg, Ctx->getConstantInt32(IceType_i8, Shift));
Matt Wala9a0168a2014-07-23 14:56:10 -07004139 return Reg;
4140 } else {
4141 // SSE has no left shift operation for vectors of 8 bit integers.
4142 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4143 Constant *ConstantMask =
Jan Voungbc004632014-09-16 15:09:10 -07004144 Ctx->getConstantInt32(IceType_i32, HIGH_ORDER_BITS_MASK);
Matt Wala9a0168a2014-07-23 14:56:10 -07004145 Variable *Reg = makeReg(Ty, RegNum);
4146 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4147 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4148 return Reg;
4149 }
4150}
4151
Matt Wala49889232014-07-18 12:45:09 -07004152OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
4153 Variable *Slot,
4154 uint32_t Offset) {
4155 // Ensure that Loc is a stack slot.
4156 assert(Slot->getWeight() == RegWeight::Zero);
4157 assert(Slot->getRegNum() == Variable::NoRegister);
4158 // Compute the location of Loc in memory.
4159 // TODO(wala,stichnot): lea should not be required. The address of
4160 // the stack slot is known at compile time (although not until after
4161 // addProlog()).
4162 const Type PointerType = IceType_i32;
4163 Variable *Loc = makeReg(PointerType);
4164 _lea(Loc, Slot);
Jan Voungbc004632014-09-16 15:09:10 -07004165 Constant *ConstantOffset = Ctx->getConstantInt32(IceType_i32, Offset);
Matt Wala49889232014-07-18 12:45:09 -07004166 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4167}
4168
Matt Wala928f1292014-07-07 16:50:46 -07004169// Helper for legalize() to emit the right code to lower an operand to a
4170// register of the appropriate type.
4171Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
4172 Type Ty = Src->getType();
4173 Variable *Reg = makeReg(Ty, RegNum);
Matt Walaad8f7262014-07-14 17:37:37 -07004174 if (isVectorType(Ty)) {
Matt Wala928f1292014-07-07 16:50:46 -07004175 _movp(Reg, Src);
4176 } else {
4177 _mov(Reg, Src);
4178 }
4179 return Reg;
4180}
4181
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004182Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
Jim Stichnothad403532014-09-25 12:44:17 -07004183 int32_t RegNum) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004184 // Assert that a physical register is allowed. To date, all calls
4185 // to legalize() allow a physical register. If a physical register
4186 // needs to be explicitly disallowed, then new code will need to be
4187 // written to force a spill.
4188 assert(Allowed & Legal_Reg);
4189 // If we're asking for a specific physical register, make sure we're
4190 // not allowing any other operand kinds. (This could be future
4191 // work, e.g. allow the shl shift amount to be either an immediate
4192 // or in ecx.)
4193 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
4194 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
4195 // Before doing anything with a Mem operand, we need to ensure
4196 // that the Base and Index components are in physical registers.
4197 Variable *Base = Mem->getBase();
4198 Variable *Index = Mem->getIndex();
4199 Variable *RegBase = NULL;
4200 Variable *RegIndex = NULL;
4201 if (Base) {
Jim Stichnothad403532014-09-25 12:44:17 -07004202 RegBase = legalizeToVar(Base);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004203 }
4204 if (Index) {
Jim Stichnothad403532014-09-25 12:44:17 -07004205 RegIndex = legalizeToVar(Index);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004206 }
4207 if (Base != RegBase || Index != RegIndex) {
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004208 From = OperandX8632Mem::create(
4209 Func, Mem->getType(), RegBase, Mem->getOffset(), RegIndex,
4210 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004211 }
4212
4213 if (!(Allowed & Legal_Mem)) {
Matt Wala928f1292014-07-07 16:50:46 -07004214 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004215 }
4216 return From;
4217 }
4218 if (llvm::isa<Constant>(From)) {
Matt Walad8f4a7d2014-06-18 09:55:03 -07004219 if (llvm::isa<ConstantUndef>(From)) {
4220 // Lower undefs to zero. Another option is to lower undefs to an
4221 // uninitialized register; however, using an uninitialized register
4222 // results in less predictable code.
4223 //
4224 // If in the future the implementation is changed to lower undef
4225 // values to uninitialized registers, a FakeDef will be needed:
4226 // Context.insert(InstFakeDef::create(Func, Reg));
4227 // This is in order to ensure that the live range of Reg is not
4228 // overestimated. If the constant being lowered is a 64 bit value,
4229 // then the result should be split and the lo and hi components will
4230 // need to go in uninitialized registers.
Matt Wala83b80362014-07-16 10:21:30 -07004231 if (isVectorType(From->getType()))
4232 return makeVectorOfZeros(From->getType());
4233 From = Ctx->getConstantZero(From->getType());
Matt Walad8f4a7d2014-06-18 09:55:03 -07004234 }
Matt Walaad8f7262014-07-14 17:37:37 -07004235 // There should be no constants of vector type (other than undef).
4236 assert(!isVectorType(From->getType()));
Jim Stichnothde4ca712014-06-29 08:13:48 -07004237 bool NeedsReg = false;
4238 if (!(Allowed & Legal_Imm))
4239 // Immediate specifically not allowed
4240 NeedsReg = true;
4241 // TODO(stichnot): LEAHACK: remove Legal_Reloc once a proper
4242 // emitter is used.
4243 if (!(Allowed & Legal_Reloc) && llvm::isa<ConstantRelocatable>(From))
4244 // Relocatable specifically not allowed
4245 NeedsReg = true;
Jan Voung3a569182014-09-29 10:16:01 -07004246 if (!(Allowed & Legal_Mem) && isScalarFloatingType(From->getType()))
Jim Stichnothde4ca712014-06-29 08:13:48 -07004247 // On x86, FP constants are lowered to mem operands.
4248 NeedsReg = true;
Jan Voung5a13f452014-06-18 10:42:02 -07004249 if (NeedsReg) {
Matt Wala928f1292014-07-07 16:50:46 -07004250 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004251 }
4252 return From;
4253 }
4254 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
Matt Walaad8f7262014-07-14 17:37:37 -07004255 // Check if the variable is guaranteed a physical register. This
4256 // can happen either when the variable is pre-colored or when it is
4257 // assigned infinite weight.
4258 bool MustHaveRegister =
4259 (Var->hasReg() || Var->getWeight() == RegWeight::Inf);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004260 // We need a new physical register for the operand if:
Jim Stichnothca662e92014-07-10 15:32:36 -07004261 // Mem is not allowed and Var isn't guaranteed a physical
4262 // register, or
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004263 // RegNum is required and Var->getRegNum() doesn't match.
Matt Walaad8f7262014-07-14 17:37:37 -07004264 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004265 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
Jim Stichnothad403532014-09-25 12:44:17 -07004266 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004267 }
4268 return From;
4269 }
4270 llvm_unreachable("Unhandled operand kind in legalize()");
4271 return From;
4272}
4273
4274// Provide a trivial wrapper to legalize() for this common usage.
Jim Stichnothad403532014-09-25 12:44:17 -07004275Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
4276 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004277}
4278
Jan Voung5cd240d2014-06-25 10:36:46 -07004279OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
4280 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
4281 // It may be the case that address mode optimization already creates
4282 // an OperandX8632Mem, so in that case it wouldn't need another level
4283 // of transformation.
4284 if (!Mem) {
4285 Variable *Base = llvm::dyn_cast<Variable>(Operand);
4286 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
4287 assert(Base || Offset);
Matt Walae3777672014-07-31 09:06:17 -07004288 if (Offset) {
Jan Voungbc004632014-09-16 15:09:10 -07004289 assert(llvm::isa<ConstantInteger32>(Offset) ||
Matt Walae3777672014-07-31 09:06:17 -07004290 llvm::isa<ConstantRelocatable>(Offset));
4291 }
Jan Voung5cd240d2014-06-25 10:36:46 -07004292 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
4293 }
4294 return llvm::cast<OperandX8632Mem>(legalize(Mem));
4295}
4296
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004297Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
Jan Voung1ee34162014-06-24 13:43:30 -07004298 // There aren't any 64-bit integer registers for x86-32.
4299 assert(Type != IceType_i64);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004300 Variable *Reg = Func->makeVariable(Type);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004301 if (RegNum == Variable::NoRegister)
4302 Reg->setWeightInfinite();
4303 else
4304 Reg->setRegNum(RegNum);
4305 return Reg;
4306}
4307
4308void TargetX8632::postLower() {
Jim Stichnoth47752552014-10-13 17:15:08 -07004309 if (Ctx->getOptLevel() != Opt_m1) {
4310 // Find two-address non-SSA instructions where Dest==Src0, and set
4311 // the DestNonKillable flag to keep liveness analysis consistent.
4312 for (Inst *Inst : Context) {
4313 if (Inst->isDeleted())
4314 continue;
4315 if (Variable *Dest = Inst->getDest()) {
4316 // TODO(stichnot): We may need to consider all source
4317 // operands, not just the first one, if using 3-address
4318 // instructions.
4319 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
4320 Inst->setDestNonKillable();
4321 }
4322 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004323 return;
Jim Stichnoth47752552014-10-13 17:15:08 -07004324 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004325 // TODO: Avoid recomputing WhiteList every instruction.
Jan Voung95598992014-07-09 09:54:25 -07004326 RegSetMask RegInclude = RegSet_All;
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004327 RegSetMask RegExclude = RegSet_StackPointer;
Jan Voung95598992014-07-09 09:54:25 -07004328 if (hasFramePointer())
4329 RegExclude |= RegSet_FramePointer;
4330 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004331 // Make one pass to black-list pre-colored registers. TODO: If
4332 // there was some prior register allocation pass that made register
4333 // assignments, those registers need to be black-listed here as
4334 // well.
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004335 llvm::DenseMap<const Variable *, const Inst *> LastUses;
4336 // The first pass also keeps track of which instruction is the last
4337 // use for each infinite-weight variable. After the last use, the
4338 // variable is released to the free list.
Jim Stichnothf44f3712014-10-01 14:05:51 -07004339 for (Inst *Inst : Context) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004340 if (Inst->isDeleted())
4341 continue;
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004342 // Don't consider a FakeKill instruction, because (currently) it
4343 // is only used to kill all scratch registers at a call site, and
4344 // we don't want to black-list all scratch registers during the
4345 // call lowering. This could become a problem since it relies on
4346 // the lowering sequence not keeping any infinite-weight variables
4347 // live across a call. TODO(stichnot): Consider replacing this
4348 // whole postLower() implementation with a robust local register
4349 // allocator, for example compute live ranges only for pre-colored
4350 // and infinite-weight variables and run the existing linear-scan
4351 // allocator.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004352 if (llvm::isa<InstFakeKill>(Inst))
4353 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004354 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
4355 Operand *Src = Inst->getSrc(SrcNum);
4356 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004357 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004358 const Variable *Var = Src->getVar(J);
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004359 // Track last uses of all variables, regardless of whether
4360 // they are pre-colored or infinite-weight.
4361 LastUses[Var] = Inst;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004362 if (!Var->hasReg())
4363 continue;
4364 WhiteList[Var->getRegNum()] = false;
4365 }
4366 }
4367 }
4368 // The second pass colors infinite-weight variables.
4369 llvm::SmallBitVector AvailableRegisters = WhiteList;
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004370 llvm::SmallBitVector FreedRegisters(WhiteList.size());
Jim Stichnothf44f3712014-10-01 14:05:51 -07004371 for (Inst *Inst : Context) {
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004372 FreedRegisters.reset();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004373 if (Inst->isDeleted())
4374 continue;
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004375 // Skip FakeKill instructions like above.
4376 if (llvm::isa<InstFakeKill>(Inst))
4377 continue;
4378 // Iterate over all variables referenced in the instruction,
4379 // including the Dest variable (if any). If the variable is
4380 // marked as infinite-weight, find it a register. If this
4381 // instruction is the last use of the variable in the lowered
4382 // sequence, release the register to the free list after this
4383 // instruction is completely processed. Note that the first pass
4384 // ignores the Dest operand, under the assumption that a
4385 // pre-colored Dest will appear as a source operand in some
4386 // subsequent instruction in the lowered sequence.
4387 Variable *Dest = Inst->getDest();
4388 SizeT NumSrcs = Inst->getSrcSize();
4389 if (Dest)
4390 ++NumSrcs;
4391 OperandList Srcs(NumSrcs);
4392 for (SizeT i = 0; i < Inst->getSrcSize(); ++i)
4393 Srcs[i] = Inst->getSrc(i);
4394 if (Dest)
4395 Srcs[NumSrcs - 1] = Dest;
4396 for (SizeT SrcNum = 0; SrcNum < NumSrcs; ++SrcNum) {
4397 Operand *Src = Srcs[SrcNum];
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004398 SizeT NumVars = Src->getNumVars();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004399 for (SizeT J = 0; J < NumVars; ++J) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004400 Variable *Var = Src->getVar(J);
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004401 if (!Var->hasReg() && Var->getWeight().isInf()) {
4402 llvm::SmallBitVector AvailableTypedRegisters =
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004403 AvailableRegisters & getRegisterSetForType(Var->getType());
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004404 assert(AvailableTypedRegisters.any());
4405 int32_t RegNum = AvailableTypedRegisters.find_first();
4406 Var->setRegNum(RegNum);
4407 AvailableRegisters[RegNum] = false;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004408 }
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004409 if (Var->hasReg()) {
4410 int32_t RegNum = Var->getRegNum();
4411 assert(!AvailableRegisters[RegNum]);
4412 if (LastUses[Var] == Inst) {
4413 if (WhiteList[RegNum])
4414 FreedRegisters[RegNum] = true;
4415 }
4416 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004417 }
4418 }
Jim Stichnoth4d79fe52014-08-18 10:55:19 -07004419 AvailableRegisters |= FreedRegisters;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004420 }
4421}
4422
Jan Voungbc004632014-09-16 15:09:10 -07004423template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
Matt Wala928f1292014-07-07 16:50:46 -07004424 Ostream &Str = Ctx->getStrEmit();
Jan Voungbc004632014-09-16 15:09:10 -07004425 Str << (int32_t)getValue();
4426}
4427
4428template <> void ConstantInteger64::emit(GlobalContext *) const {
4429 llvm_unreachable("Not expecting to emit 64-bit integers");
Matt Wala928f1292014-07-07 16:50:46 -07004430}
4431
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004432template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
4433 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004434 // It would be better to prefix with ".L$" instead of "L$", but
4435 // llvm-mc doesn't parse "dword ptr [.L$foo]".
4436 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
4437}
4438
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004439template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
4440 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004441 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
4442}
4443
Matt Walae3777672014-07-31 09:06:17 -07004444void ConstantUndef::emit(GlobalContext *) const {
4445 llvm_unreachable("undef value encountered by emitter.");
4446}
4447
Jim Stichnothde4ca712014-06-29 08:13:48 -07004448TargetGlobalInitX8632::TargetGlobalInitX8632(GlobalContext *Ctx)
4449 : TargetGlobalInitLowering(Ctx) {}
4450
Karl Schimpf9d98d792014-10-13 15:01:08 -07004451void TargetGlobalInitX8632::lower(const VariableDeclaration &Var) {
Jim Stichnothde4ca712014-06-29 08:13:48 -07004452
4453 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothde4ca712014-06-29 08:13:48 -07004454
Karl Schimpf9d98d792014-10-13 15:01:08 -07004455 const VariableDeclaration::InitializerListType &Initializers =
4456 Var.getInitializers();
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004457
4458 // If external and not initialized, this must be a cross test.
4459 // Don't generate a declaration for such cases.
Jim Stichnoth088b2be2014-10-23 12:02:08 -07004460 bool IsExternal = Var.isExternal() || Ctx->getFlags().DisableInternal;
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004461 if (IsExternal && !Var.hasInitializer()) return;
4462
4463 bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004464 bool IsConstant = Var.getIsConstant();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004465 uint32_t Align = Var.getAlignment();
4466 SizeT Size = Var.getNumBytes();
4467 IceString MangledName = Var.mangleName(Ctx);
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004468 IceString SectionSuffix = "";
4469 if (Ctx->getFlags().DataSections)
4470 SectionSuffix = "." + MangledName;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004471
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004472 Str << "\t.type\t" << MangledName << ",@object\n";
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004473
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004474 if (IsConstant)
4475 Str << "\t.section\t.rodata" << SectionSuffix << ",\"a\",@progbits\n";
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004476 else if (HasNonzeroInitializer)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004477 Str << "\t.section\t.data" << SectionSuffix << ",\"aw\",@progbits\n";
4478 else if (IsExternal)
4479 Str << "\t.section\t.bss" << SectionSuffix << ",\"aw\",@nobits\n";
4480 // No .section for non-constant + zeroinitializer + internal
4481
4482 if (IsExternal)
4483 Str << "\t.globl\t" << MangledName << "\n";
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004484 else if (!IsConstant && !HasNonzeroInitializer)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004485 Str << "\t.local\t" << MangledName << "\n";
4486 // Internal symbols only get .local when using .comm.
4487
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004488 if ((IsConstant || HasNonzeroInitializer || IsExternal) && Align > 1)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004489 Str << "\t.align\t" << Align << "\n";
4490 // Alignment is part of .comm.
4491
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004492 if (IsConstant || HasNonzeroInitializer || IsExternal)
Jim Stichnothde4ca712014-06-29 08:13:48 -07004493 Str << MangledName << ":\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004494 else
4495 Str << "\t.comm\t" << MangledName << "," << Size << "," << Align << "\n";
4496
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004497 if (HasNonzeroInitializer) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004498 for (VariableDeclaration::Initializer *Init : Initializers) {
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004499 switch (Init->getKind()) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004500 case VariableDeclaration::Initializer::DataInitializerKind: {
4501 const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(Init)
4502 ->getContents();
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004503 for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
4504 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4505 }
4506 break;
4507 }
Karl Schimpf9d98d792014-10-13 15:01:08 -07004508 case VariableDeclaration::Initializer::ZeroInitializerKind:
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004509 Str << "\t.zero\t" << Init->getNumBytes() << "\n";
4510 break;
Karl Schimpf9d98d792014-10-13 15:01:08 -07004511 case VariableDeclaration::Initializer::RelocInitializerKind: {
4512 const auto Reloc =
4513 llvm::cast<VariableDeclaration::RelocInitializer>(Init);
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004514 Str << "\t.long\t";
Karl Schimpf9d98d792014-10-13 15:01:08 -07004515 Str << Reloc->getDeclaration()->mangleName(Ctx);
4516 if (VariableDeclaration::RelocOffsetType Offset = Reloc->getOffset()) {
4517 if (Offset >= 0 || (Offset == INT32_MIN))
4518 Str << " + " << Offset;
4519 else
4520 Str << " - " << -Offset;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004521 }
4522 Str << "\n";
4523 break;
4524 }
4525 default: {
4526 std::string Buffer;
4527 llvm::raw_string_ostream StrBuf(Buffer);
4528 StrBuf << "Unable to lower initializer: ";
4529 Init->dump(StrBuf);
4530 llvm::report_fatal_error(StrBuf.str());
4531 break;
4532 }
4533 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07004534 }
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004535 } else if (IsConstant || IsExternal)
4536 Str << "\t.zero\t" << Size << "\n";
4537 // Size is part of .comm.
4538
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004539 if (IsConstant || HasNonzeroInitializer || IsExternal)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004540 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4541 // Size is part of .comm.
Jim Stichnothde4ca712014-06-29 08:13:48 -07004542}
4543
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004544} // end of namespace Ice