blob: 8c59499c73f183a73967e11ba7c56d0aaa64ff82 [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
Jim Stichnoth70d0a052014-11-14 15:53:46 -080012// high-level instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070013//
14//===----------------------------------------------------------------------===//
15
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070016#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/MathExtras.h"
18
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070019#include "IceCfg.h"
20#include "IceCfgNode.h"
Jim Stichnothbfb03e52014-08-26 10:29:05 -070021#include "IceClFlags.h"
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070022#include "IceDefs.h"
Jan Voungec270732015-01-12 17:00:22 -080023#include "IceELFObjectWriter.h"
Karl Schimpfe3f64d02014-10-07 10:38:22 -070024#include "IceGlobalInits.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070025#include "IceInstX8632.h"
Jim Stichnoth336f6c42014-10-30 15:01:31 -070026#include "IceLiveness.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070027#include "IceOperand.h"
Jan Voungbd385e42014-09-18 18:18:10 -070028#include "IceRegistersX8632.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070029#include "IceTargetLoweringX8632.def"
30#include "IceTargetLoweringX8632.h"
Jan Voung8acded02014-09-22 18:02:25 -070031#include "IceUtils.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070032
33namespace Ice {
34
35namespace {
36
Matt Walace0ca8f2014-07-24 12:34:20 -070037// The following table summarizes the logic for lowering the fcmp
38// instruction. There is one table entry for each of the 16 conditions.
39//
40// The first four columns describe the case when the operands are
41// floating point scalar values. A comment in lowerFcmp() describes the
42// lowering template. In the most general case, there is a compare
43// followed by two conditional branches, because some fcmp conditions
44// don't map to a single x86 conditional branch. However, in many cases
45// it is possible to swap the operands in the comparison and have a
46// single conditional branch. Since it's quite tedious to validate the
47// table by hand, good execution tests are helpful.
48//
49// The last two columns describe the case when the operands are vectors
50// of floating point values. For most fcmp conditions, there is a clear
51// mapping to a single x86 cmpps instruction variant. Some fcmp
52// conditions require special code to handle and these are marked in the
53// table with a Cmpps_Invalid predicate.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070054const struct TableFcmp_ {
55 uint32_t Default;
Matt Walace0ca8f2014-07-24 12:34:20 -070056 bool SwapScalarOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070057 CondX86::BrCond C1, C2;
Matt Walace0ca8f2014-07-24 12:34:20 -070058 bool SwapVectorOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070059 CondX86::CmppsCond Predicate;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070060} TableFcmp[] = {
Matt Walace0ca8f2014-07-24 12:34:20 -070061#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jan Voungbd385e42014-09-18 18:18:10 -070062 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070063 ,
Jim Stichnothdd842db2015-01-27 12:53:53 -080064 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070065#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -080066};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070067const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
68
69// The following table summarizes the logic for lowering the icmp instruction
70// for i32 and narrower types. Each icmp condition has a clear mapping to an
71// x86 conditional branch instruction.
72
73const struct TableIcmp32_ {
Jan Voungbd385e42014-09-18 18:18:10 -070074 CondX86::BrCond Mapping;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070075} TableIcmp32[] = {
76#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070077 { CondX86::C_32 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070078 ,
Jim Stichnothdd842db2015-01-27 12:53:53 -080079 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070080#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -080081};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070082const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
83
84// The following table summarizes the logic for lowering the icmp instruction
85// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
86// conditional branches are needed. For the other conditions, three separate
87// conditional branches are needed.
88const struct TableIcmp64_ {
Jan Voungbd385e42014-09-18 18:18:10 -070089 CondX86::BrCond C1, C2, C3;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070090} TableIcmp64[] = {
91#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070092 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070093 ,
Jim Stichnothdd842db2015-01-27 12:53:53 -080094 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070095#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -080096};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070097const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
98
Jan Voungbd385e42014-09-18 18:18:10 -070099CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700100 size_t Index = static_cast<size_t>(Cond);
101 assert(Index < TableIcmp32Size);
102 return TableIcmp32[Index].Mapping;
103}
104
Matt Wala49889232014-07-18 12:45:09 -0700105const struct TableTypeX8632Attributes_ {
106 Type InVectorElementType;
107} TableTypeX8632Attributes[] = {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700108#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Matt Wala49889232014-07-18 12:45:09 -0700109 { elementty } \
110 ,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800111 ICETYPEX8632_TABLE
Matt Wala49889232014-07-18 12:45:09 -0700112#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -0800113};
Matt Wala49889232014-07-18 12:45:09 -0700114const size_t TableTypeX8632AttributesSize =
115 llvm::array_lengthof(TableTypeX8632Attributes);
116
117// Return the type which the elements of the vector have in the X86
118// representation of the vector.
119Type getInVectorElementType(Type Ty) {
120 assert(isVectorType(Ty));
121 size_t Index = static_cast<size_t>(Ty);
Jim Stichnoth6e992142014-07-30 14:45:20 -0700122 (void)Index;
Matt Wala49889232014-07-18 12:45:09 -0700123 assert(Index < TableTypeX8632AttributesSize);
124 return TableTypeX8632Attributes[Ty].InVectorElementType;
125}
126
Matt Wala45a06232014-07-09 16:33:22 -0700127// The maximum number of arguments to pass in XMM registers
Matt Wala105b7042014-08-11 19:56:19 -0700128const uint32_t X86_MAX_XMM_ARGS = 4;
Matt Wala83b80362014-07-16 10:21:30 -0700129// The number of bits in a byte
Matt Wala105b7042014-08-11 19:56:19 -0700130const uint32_t X86_CHAR_BIT = 8;
131// Stack alignment
132const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
133// Size of the return address on the stack
134const uint32_t X86_RET_IP_SIZE_BYTES = 4;
Matt Walad4799f42014-08-14 14:24:12 -0700135// The base 2 logarithm of the width in bytes of the smallest stack slot
136const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
137// The base 2 logarithm of the width in bytes of the largest stack slot
138const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
Matt Walac3302742014-08-15 16:21:56 -0700139// The number of different NOP instructions
140const uint32_t X86_NUM_NOP_VARIANTS = 5;
Matt Wala105b7042014-08-11 19:56:19 -0700141
Matt Walad4799f42014-08-14 14:24:12 -0700142// Value and Alignment are in bytes. Return Value adjusted to the next
143// highest multiple of Alignment.
144uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
Matt Wala105b7042014-08-11 19:56:19 -0700145 // power of 2
Matt Walad4799f42014-08-14 14:24:12 -0700146 assert((Alignment & (Alignment - 1)) == 0);
147 return (Value + Alignment - 1) & -Alignment;
148}
149
150// Value is in bytes. Return Value adjusted to the next highest multiple
151// of the stack alignment.
152uint32_t applyStackAlignment(uint32_t Value) {
153 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
Matt Wala105b7042014-08-11 19:56:19 -0700154}
Matt Wala45a06232014-07-09 16:33:22 -0700155
Matt Wala0a450512014-07-30 12:44:39 -0700156// Instruction set options
157namespace cl = ::llvm::cl;
Jim Stichnothdd842db2015-01-27 12:53:53 -0800158cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(
159 "mattr", cl::desc("X86 target attributes"), cl::init(TargetX8632::SSE2),
160 cl::values(clEnumValN(TargetX8632::SSE2, "sse2",
161 "Enable SSE2 instructions (default)"),
162 clEnumValN(TargetX8632::SSE4_1, "sse4.1",
163 "Enable SSE 4.1 instructions"),
164 clEnumValEnd));
Matt Wala0a450512014-07-30 12:44:39 -0700165
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700166// In some cases, there are x-macros tables for both high-level and
167// low-level instructions/operands that use the same enum key value.
168// The tables are kept separate to maintain a proper separation
Jim Stichnothfac55172014-10-01 13:06:21 -0700169// between abstraction layers. There is a risk that the tables could
170// get out of sync if enum values are reordered or if entries are
171// added or deleted. The following dummy namespaces use
172// static_asserts to ensure everything is kept in sync.
173
174// Validate the enum values in FCMPX8632_TABLE.
175namespace dummy1 {
176// Define a temporary set of enum values based on low-level table
177// entries.
178enum _tmp_enum {
Matt Walace0ca8f2014-07-24 12:34:20 -0700179#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700180 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700181#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700182 _num
183};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700184// Define a set of constants based on high-level table entries.
185#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700186ICEINSTFCMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700187#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700188// Define a set of constants based on low-level table entries, and
189// ensure the table entry keys are consistent.
Matt Walace0ca8f2014-07-24 12:34:20 -0700190#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700191 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700192 static_assert( \
193 _table1_##val == _table2_##val, \
194 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
195FCMPX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700196#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700197// Repeat the static asserts with respect to the high-level table
198// entries in case the high-level table has extra entries.
199#define X(tag, str) \
200 static_assert( \
201 _table1_##tag == _table2_##tag, \
202 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
203ICEINSTFCMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700204#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700205} // end of namespace dummy1
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700206
Jim Stichnothfac55172014-10-01 13:06:21 -0700207// Validate the enum values in ICMPX8632_TABLE.
208namespace dummy2 {
209// Define a temporary set of enum values based on low-level table
210// entries.
211enum _tmp_enum {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700212#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700213 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700214#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700215 _num
216};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700217// Define a set of constants based on high-level table entries.
218#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700219ICEINSTICMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700220#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700221// Define a set of constants based on low-level table entries, and
222// ensure the table entry keys are consistent.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700223#define X(val, C_32, C1_64, C2_64, C3_64) \
224 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700225 static_assert( \
226 _table1_##val == _table2_##val, \
227 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
228ICMPX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700229#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700230// Repeat the static asserts with respect to the high-level table
231// entries in case the high-level table has extra entries.
232#define X(tag, str) \
233 static_assert( \
234 _table1_##tag == _table2_##tag, \
235 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
236ICEINSTICMP_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700237#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700238} // end of namespace dummy2
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700239
Jim Stichnothfac55172014-10-01 13:06:21 -0700240// Validate the enum values in ICETYPEX8632_TABLE.
241namespace dummy3 {
242// Define a temporary set of enum values based on low-level table
243// entries.
244enum _tmp_enum {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700245#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
Jim Stichnothfac55172014-10-01 13:06:21 -0700246 ICETYPEX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700247#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700248 _num
249};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700250// Define a set of constants based on high-level table entries.
Matt Wala928f1292014-07-07 16:50:46 -0700251#define X(tag, size, align, elts, elty, str) \
252 static const int _table1_##tag = tag;
Jim Stichnothfac55172014-10-01 13:06:21 -0700253ICETYPE_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700254#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700255// Define a set of constants based on low-level table entries, and
256// ensure the table entry keys are consistent.
Jim Stichnothbca2f652014-11-01 10:13:54 -0700257#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700258 static const int _table2_##tag = _tmp_##tag; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700259 static_assert(_table1_##tag == _table2_##tag, \
260 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
261ICETYPEX8632_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700262#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700263// Repeat the static asserts with respect to the high-level table
264// entries in case the high-level table has extra entries.
Matt Wala928f1292014-07-07 16:50:46 -0700265#define X(tag, size, align, elts, elty, str) \
Jim Stichnothfac55172014-10-01 13:06:21 -0700266 static_assert(_table1_##tag == _table2_##tag, \
267 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
268ICETYPE_TABLE;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700269#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700270} // end of namespace dummy3
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700271
272} // end of anonymous namespace
273
274TargetX8632::TargetX8632(Cfg *Func)
Matt Wala0a450512014-07-30 12:44:39 -0700275 : TargetLowering(Func), InstructionSet(CLInstructionSet),
Matt Wala105b7042014-08-11 19:56:19 -0700276 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
Jim Stichnoth33c80642014-11-11 09:49:04 -0800277 SpillAreaSizeBytes(0), NextLabelNumber(0) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700278 // TODO: Don't initialize IntegerRegisters and friends every time.
279 // Instead, initialize in some sort of static initializer for the
280 // class.
Jan Voungbd385e42014-09-18 18:18:10 -0700281 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
282 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
283 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
284 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
285 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
286 ScratchRegs.resize(RegX8632::Reg_NUM);
287#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700288 frameptr, isI8, isInt, isFP) \
Jan Voungbd385e42014-09-18 18:18:10 -0700289 IntegerRegisters[RegX8632::val] = isInt; \
290 IntegerRegistersI8[RegX8632::val] = isI8; \
291 FloatRegisters[RegX8632::val] = isFP; \
292 VectorRegisters[RegX8632::val] = isFP; \
293 ScratchRegs[RegX8632::val] = scratch;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700294 REGX8632_TABLE;
295#undef X
296 TypeToRegisterSet[IceType_void] = InvalidRegisters;
297 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
298 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
299 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
300 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
301 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
302 TypeToRegisterSet[IceType_f32] = FloatRegisters;
303 TypeToRegisterSet[IceType_f64] = FloatRegisters;
Matt Wala928f1292014-07-07 16:50:46 -0700304 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
305 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
306 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
307 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
308 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
309 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
310 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700311}
312
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700313void TargetX8632::translateO2() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700314 TimerMarker T(TimerStack::TT_O2, Func);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700315
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700316 if (!Ctx->getFlags().PhiEdgeSplit) {
317 // Lower Phi instructions.
318 Func->placePhiLoads();
319 if (Func->hasError())
320 return;
321 Func->placePhiStores();
322 if (Func->hasError())
323 return;
324 Func->deletePhis();
325 if (Func->hasError())
326 return;
327 Func->dump("After Phi lowering");
328 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700329
330 // Address mode optimization.
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700331 Func->getVMetadata()->init(VMK_SingleDefs);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700332 Func->doAddressOpt();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700333
Matt Wala45a06232014-07-09 16:33:22 -0700334 // Argument lowering
Matt Wala45a06232014-07-09 16:33:22 -0700335 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700336
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700337 // Target lowering. This requires liveness analysis for some parts
338 // of the lowering decisions, such as compare/branch fusing. If
339 // non-lightweight liveness analysis is used, the instructions need
340 // to be renumbered first. TODO: This renumbering should only be
341 // necessary if we're actually calculating live intervals, which we
342 // only do for register allocation.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700343 Func->renumberInstructions();
344 if (Func->hasError())
345 return;
Matt Wala45a06232014-07-09 16:33:22 -0700346
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700347 // TODO: It should be sufficient to use the fastest liveness
348 // calculation, i.e. livenessLightweight(). However, for some
349 // reason that slows down the rest of the translation. Investigate.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700350 Func->liveness(Liveness_Basic);
351 if (Func->hasError())
352 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700353 Func->dump("After x86 address mode opt");
Matt Wala45a06232014-07-09 16:33:22 -0700354
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700355 Func->genCode();
356 if (Func->hasError())
357 return;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700358 Func->dump("After x86 codegen");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700359
360 // Register allocation. This requires instruction renumbering and
361 // full liveness analysis.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700362 Func->renumberInstructions();
363 if (Func->hasError())
364 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700365 Func->liveness(Liveness_Intervals);
366 if (Func->hasError())
367 return;
Jim Stichnoth9c234e22014-10-01 09:28:21 -0700368 // Validate the live range computations. The expensive validation
369 // call is deliberately only made when assertions are enabled.
370 assert(Func->validateLiveness());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700371 // The post-codegen dump is done here, after liveness analysis and
372 // associated cleanup, to make the dump cleaner and more useful.
373 Func->dump("After initial x8632 codegen");
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700374 Func->getVMetadata()->init(VMK_All);
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800375 regAlloc(RAK_Global);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700376 if (Func->hasError())
377 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700378 Func->dump("After linear scan regalloc");
379
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700380 if (Ctx->getFlags().PhiEdgeSplit) {
381 Func->advancedPhiLowering();
382 Func->dump("After advanced Phi lowering");
383 }
384
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700385 // Stack frame mapping.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700386 Func->genFrame();
387 if (Func->hasError())
388 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700389 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700390
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700391 Func->contractEmptyNodes();
392 Func->reorderNodes();
Jim Stichnoth98712a32014-10-24 10:59:02 -0700393
Jim Stichnothff9c7062014-09-18 04:50:49 -0700394 // Branch optimization. This needs to be done just before code
395 // emission. In particular, no transformations that insert or
396 // reorder CfgNodes should be done after branch optimization. We go
397 // ahead and do it before nop insertion to reduce the amount of work
398 // needed for searching for opportunities.
399 Func->doBranchOpt();
400 Func->dump("After branch optimization");
401
Matt Walac3302742014-08-15 16:21:56 -0700402 // Nop insertion
403 if (shouldDoNopInsertion()) {
404 Func->doNopInsertion();
405 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700406}
407
408void TargetX8632::translateOm1() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700409 TimerMarker T(TimerStack::TT_Om1, Func);
Jim Stichnothbb8b6242014-11-04 09:10:01 -0800410
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700411 Func->placePhiLoads();
412 if (Func->hasError())
413 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700414 Func->placePhiStores();
415 if (Func->hasError())
416 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700417 Func->deletePhis();
418 if (Func->hasError())
419 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700420 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700421
Matt Wala45a06232014-07-09 16:33:22 -0700422 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700423
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700424 Func->genCode();
425 if (Func->hasError())
426 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700427 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700428
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800429 regAlloc(RAK_InfOnly);
430 if (Func->hasError())
431 return;
432 Func->dump("After regalloc of infinite-weight variables");
433
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700434 Func->genFrame();
435 if (Func->hasError())
436 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700437 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700438
439 // Nop insertion
440 if (shouldDoNopInsertion()) {
441 Func->doNopInsertion();
442 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700443}
444
Jim Stichnothff9c7062014-09-18 04:50:49 -0700445bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
446 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
447 return Br->optimizeBranch(NextNode);
448 }
449 return false;
450}
451
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700452IceString TargetX8632::RegNames[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700453#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700454 frameptr, isI8, isInt, isFP) \
455 name,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800456 REGX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700457#undef X
458};
459
Jim Stichnoth98712a32014-10-24 10:59:02 -0700460Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {
461 if (Ty == IceType_void)
462 Ty = IceType_i32;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700463 if (PhysicalRegisters[Ty].empty())
464 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
465 assert(RegNum < PhysicalRegisters[Ty].size());
466 Variable *Reg = PhysicalRegisters[Ty][RegNum];
Jim Stichnothae953202014-12-20 06:17:49 -0800467 if (Reg == nullptr) {
Jim Stichnoth98712a32014-10-24 10:59:02 -0700468 Reg = Func->makeVariable(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700469 Reg->setRegNum(RegNum);
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700470 PhysicalRegisters[Ty][RegNum] = Reg;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700471 // Specially mark esp as an "argument" so that it is considered
472 // live upon function entry.
Jim Stichnoth47752552014-10-13 17:15:08 -0700473 if (RegNum == RegX8632::Reg_esp) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700474 Func->addImplicitArg(Reg);
Jim Stichnoth47752552014-10-13 17:15:08 -0700475 Reg->setIgnoreLiveness();
476 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700477 }
478 return Reg;
479}
480
481IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
Jan Voungbd385e42014-09-18 18:18:10 -0700482 assert(RegNum < RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700483 static IceString RegNames8[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700484#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700485 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700486 name8,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800487 REGX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700488#undef X
489 };
490 static IceString RegNames16[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700491#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700492 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700493 name16,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800494 REGX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700495#undef X
496 };
497 switch (Ty) {
498 case IceType_i1:
499 case IceType_i8:
500 return RegNames8[RegNum];
501 case IceType_i16:
502 return RegNames16[RegNum];
503 default:
504 return RegNames[RegNum];
505 }
506}
507
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700508void TargetX8632::emitVariable(const Variable *Var) const {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700509 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700510 if (Var->hasReg()) {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700511 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700512 return;
513 }
Jim Stichnothdd165072014-11-02 09:41:45 -0800514 if (Var->getWeight().isInf())
515 llvm_unreachable("Infinite-weight Variable has no register assigned");
Jim Stichnothbca2f652014-11-01 10:13:54 -0700516 const Type Ty = IceType_i32;
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700517 int32_t Offset = Var->getStackOffset();
518 if (!hasFramePointer())
519 Offset += getStackAdjustment();
Jim Stichnothbca2f652014-11-01 10:13:54 -0700520 if (Offset)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700521 Str << Offset;
Jim Stichnothbca2f652014-11-01 10:13:54 -0700522 Str << "(%" << getRegName(getFrameOrStackReg(), Ty) << ")";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700523}
524
Jan Voung8acded02014-09-22 18:02:25 -0700525x86::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
Jim Stichnothdd165072014-11-02 09:41:45 -0800526 if (Var->hasReg())
527 llvm_unreachable("Stack Variable has a register assigned");
528 if (Var->getWeight().isInf())
529 llvm_unreachable("Infinite-weight Variable has no register assigned");
Jan Voung8acded02014-09-22 18:02:25 -0700530 int32_t Offset = Var->getStackOffset();
531 if (!hasFramePointer())
532 Offset += getStackAdjustment();
533 return x86::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
534}
535
Matt Wala45a06232014-07-09 16:33:22 -0700536void TargetX8632::lowerArguments() {
537 VarList &Args = Func->getArgs();
538 // The first four arguments of vector type, regardless of their
539 // position relative to the other arguments in the argument list, are
540 // passed in registers xmm0 - xmm3.
541 unsigned NumXmmArgs = 0;
542
543 Context.init(Func->getEntryNode());
544 Context.setInsertPoint(Context.getCur());
545
546 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
547 ++I) {
548 Variable *Arg = Args[I];
549 Type Ty = Arg->getType();
550 if (!isVectorType(Ty))
551 continue;
552 // Replace Arg in the argument list with the home register. Then
553 // generate an instruction in the prolog to copy the home register
554 // to the assigned location of Arg.
Jan Voungbd385e42014-09-18 18:18:10 -0700555 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
Matt Wala45a06232014-07-09 16:33:22 -0700556 ++NumXmmArgs;
Jim Stichnoth9a04c072014-12-11 15:51:42 -0800557 Variable *RegisterArg = Func->makeVariable(Ty);
558 if (ALLOW_DUMP)
559 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
Matt Wala45a06232014-07-09 16:33:22 -0700560 RegisterArg->setRegNum(RegNum);
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700561 RegisterArg->setIsArg();
562 Arg->setIsArg(false);
Matt Wala45a06232014-07-09 16:33:22 -0700563
564 Args[I] = RegisterArg;
565 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
566 }
567}
568
Matt Walad4799f42014-08-14 14:24:12 -0700569void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
570 // Sort the variables into buckets according to the log of their width
571 // in bytes.
572 const SizeT NumBuckets =
573 X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1;
574 VarList Buckets[NumBuckets];
575
Jim Stichnothf44f3712014-10-01 14:05:51 -0700576 for (Variable *Var : Source) {
Matt Walad4799f42014-08-14 14:24:12 -0700577 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());
Jan Voung1eb3a552014-09-12 11:05:47 -0700578 SizeT LogNaturalAlignment = llvm::findFirstSet(NaturalAlignment);
Matt Walad4799f42014-08-14 14:24:12 -0700579 assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE);
580 assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE);
581 SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE;
582 Buckets[BucketIndex].push_back(Var);
583 }
584
585 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {
586 VarList &List = Buckets[NumBuckets - I - 1];
587 Dest.insert(Dest.end(), List.begin(), List.end());
588 }
589}
590
Matt Wala45a06232014-07-09 16:33:22 -0700591// Helper function for addProlog().
592//
593// This assumes Arg is an argument passed on the stack. This sets the
594// frame offset for Arg and updates InArgsSizeBytes according to Arg's
595// width. For an I64 arg that has been split into Lo and Hi components,
596// it calls itself recursively on the components, taking care to handle
597// Lo first because of the little-endian architecture. Lastly, this
598// function generates an instruction to copy Arg into its assigned
599// register if applicable.
600void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
601 size_t BasicFrameOffset,
602 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700603 Variable *Lo = Arg->getLo();
604 Variable *Hi = Arg->getHi();
605 Type Ty = Arg->getType();
606 if (Lo && Hi && Ty == IceType_i64) {
607 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
608 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
Matt Wala45a06232014-07-09 16:33:22 -0700609 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
610 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700611 return;
612 }
Matt Wala105b7042014-08-11 19:56:19 -0700613 if (isVectorType(Ty)) {
614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
615 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
Matt Wala45a06232014-07-09 16:33:22 -0700617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700618 if (Arg->hasReg()) {
619 assert(Ty != IceType_i64);
620 OperandX8632Mem *Mem = OperandX8632Mem::create(
Jim Stichnothd2cb4362014-11-20 11:24:42 -0800621 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
Matt Wala45a06232014-07-09 16:33:22 -0700622 if (isVectorType(Arg->getType())) {
623 _movp(Arg, Mem);
624 } else {
625 _mov(Arg, Mem);
626 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700627 // This argument-copying instruction uses an explicit
628 // OperandX8632Mem operand instead of a Variable, so its
629 // fill-from-stack operation has to be tracked separately for
630 // statistics.
631 Ctx->statsUpdateFills();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700632 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700633}
634
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700635Type TargetX8632::stackSlotType() { return IceType_i32; }
636
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700637void TargetX8632::addProlog(CfgNode *Node) {
Matt Walad4799f42014-08-14 14:24:12 -0700638 // Stack frame layout:
639 //
640 // +------------------------+
641 // | 1. return address |
642 // +------------------------+
643 // | 2. preserved registers |
644 // +------------------------+
645 // | 3. padding |
646 // +------------------------+
647 // | 4. global spill area |
648 // +------------------------+
649 // | 5. padding |
650 // +------------------------+
651 // | 6. local spill area |
652 // +------------------------+
653 // | 7. padding |
654 // +------------------------+
655 // | 8. allocas |
656 // +------------------------+
657 //
658 // The following variables record the size in bytes of the given areas:
659 // * X86_RET_IP_SIZE_BYTES: area 1
660 // * PreservedRegsSizeBytes: area 2
661 // * SpillAreaPaddingBytes: area 3
662 // * GlobalsSize: area 4
663 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
664 // * LocalsSpillAreaSize: area 6
665 // * SpillAreaSizeBytes: areas 3 - 7
666
Jim Stichnoth33c80642014-11-11 09:49:04 -0800667 // Make a final pass over the Cfg to determine which variables need
668 // stack slots.
669 llvm::BitVector IsVarReferenced(Func->getNumVariables());
670 for (CfgNode *Node : Func->getNodes()) {
Jim Stichnoth29841e82014-12-23 12:26:24 -0800671 for (Inst &Inst : Node->getInsts()) {
672 if (Inst.isDeleted())
Jim Stichnoth33c80642014-11-11 09:49:04 -0800673 continue;
Jim Stichnoth29841e82014-12-23 12:26:24 -0800674 if (const Variable *Var = Inst.getDest())
Jim Stichnoth33c80642014-11-11 09:49:04 -0800675 IsVarReferenced[Var->getIndex()] = true;
Jim Stichnoth29841e82014-12-23 12:26:24 -0800676 for (SizeT I = 0; I < Inst.getSrcSize(); ++I) {
677 Operand *Src = Inst.getSrc(I);
Jim Stichnoth33c80642014-11-11 09:49:04 -0800678 SizeT NumVars = Src->getNumVars();
679 for (SizeT J = 0; J < NumVars; ++J) {
680 const Variable *Var = Src->getVar(J);
681 IsVarReferenced[Var->getIndex()] = true;
682 }
683 }
684 }
685 }
686
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700687 // If SimpleCoalescing is false, each variable without a register
688 // gets its own unique stack slot, which leads to large stack
689 // frames. If SimpleCoalescing is true, then each "global" variable
690 // without a register gets its own slot, but "local" variable slots
691 // are reused across basic blocks. E.g., if A and B are local to
Jan Voung44d53e12014-09-11 19:18:03 -0700692 // block 1 and C is local to block 2, then C may share a slot with A or B.
693 //
694 // We cannot coalesce stack slots if this function calls a "returns twice"
695 // function. In that case, basic blocks may be revisited, and variables
696 // local to those basic blocks are actually live until after the
697 // called function returns a second time.
698 const bool SimpleCoalescing = !callsReturnsTwice();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700699 size_t InArgsSizeBytes = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700700 size_t PreservedRegsSizeBytes = 0;
Matt Walad4799f42014-08-14 14:24:12 -0700701 SpillAreaSizeBytes = 0;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700702 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700703 Context.init(Node);
704 Context.setInsertPoint(Context.getCur());
705
706 // Determine stack frame offsets for each Variable without a
707 // register assignment. This can be done as one variable per stack
708 // slot. Or, do coalescing by running the register allocator again
709 // with an infinite set of registers (as a side effect, this gives
710 // variables a second chance at physical register assignment).
711 //
712 // A middle ground approach is to leverage sparsity and allocate one
713 // block of space on the frame for globals (variables with
714 // multi-block lifetime), and one block to share for locals
715 // (single-block lifetime).
716
717 llvm::SmallBitVector CalleeSaves =
718 getRegisterSet(RegSet_CalleeSave, RegSet_None);
719
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700720 size_t GlobalsSize = 0;
721 std::vector<size_t> LocalsSize(Func->getNumNodes());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700722
723 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
Matt Walad4799f42014-08-14 14:24:12 -0700724 // SpillAreaSizeBytes.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700725 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
726 const VarList &Variables = Func->getVariables();
727 const VarList &Args = Func->getArgs();
Jim Stichnoth800dab22014-09-20 12:25:02 -0700728 VarList SpilledVariables, SortedSpilledVariables, VariablesLinkedToSpillSlots;
Matt Walad4799f42014-08-14 14:24:12 -0700729
730 // If there is a separate locals area, this specifies the alignment
731 // for it.
732 uint32_t LocalsSlotsAlignmentBytes = 0;
733 // The entire spill locations area gets aligned to largest natural
734 // alignment of the variables that have a spill slot.
735 uint32_t SpillAreaAlignmentBytes = 0;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700736 for (Variable *Var : Variables) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700737 if (Var->hasReg()) {
738 RegsUsed[Var->getRegNum()] = true;
739 continue;
740 }
Matt Wala45a06232014-07-09 16:33:22 -0700741 // An argument either does not need a stack slot (if passed in a
742 // register) or already has one (if passed on the stack).
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700743 if (Var->getIsArg())
744 continue;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700745 // An unreferenced variable doesn't need a stack slot.
Jim Stichnoth33c80642014-11-11 09:49:04 -0800746 if (!IsVarReferenced[Var->getIndex()])
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700747 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700748 // A spill slot linked to a variable with a stack slot should reuse
749 // that stack slot.
Jim Stichnoth800dab22014-09-20 12:25:02 -0700750 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
751 assert(Var->getWeight() == RegWeight::Zero);
752 if (!SpillVar->getLinkedTo()->hasReg()) {
753 VariablesLinkedToSpillSlots.push_back(Var);
754 continue;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700755 }
756 }
Matt Walad4799f42014-08-14 14:24:12 -0700757 SpilledVariables.push_back(Var);
758 }
759
760 SortedSpilledVariables.reserve(SpilledVariables.size());
761 sortByAlignment(SortedSpilledVariables, SpilledVariables);
Jim Stichnothf44f3712014-10-01 14:05:51 -0700762 for (Variable *Var : SortedSpilledVariables) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700763 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Matt Walad4799f42014-08-14 14:24:12 -0700764 if (!SpillAreaAlignmentBytes)
765 SpillAreaAlignmentBytes = Increment;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700766 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
767 if (VMetadata->isMultiBlock(Var)) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700768 GlobalsSize += Increment;
769 } else {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700770 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700771 LocalsSize[NodeIndex] += Increment;
Matt Walad4799f42014-08-14 14:24:12 -0700772 if (LocalsSize[NodeIndex] > SpillAreaSizeBytes)
773 SpillAreaSizeBytes = LocalsSize[NodeIndex];
774 if (!LocalsSlotsAlignmentBytes)
775 LocalsSlotsAlignmentBytes = Increment;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700776 }
777 } else {
Matt Walad4799f42014-08-14 14:24:12 -0700778 SpillAreaSizeBytes += Increment;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700779 }
780 }
Matt Walad4799f42014-08-14 14:24:12 -0700781 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
782
783 SpillAreaSizeBytes += GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700784
785 // Add push instructions for preserved registers.
Jim Stichnoth18735602014-09-16 19:59:35 -0700786 uint32_t NumCallee = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700787 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
788 if (CalleeSaves[i] && RegsUsed[i]) {
Jim Stichnoth18735602014-09-16 19:59:35 -0700789 ++NumCallee;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700790 PreservedRegsSizeBytes += 4;
Jan Voung0b9eee52014-10-07 11:20:10 -0700791 _push(getPhysicalRegister(i));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700792 }
793 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700794 Ctx->statsUpdateRegistersSaved(NumCallee);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700795
796 // Generate "push ebp; mov ebp, esp"
797 if (IsEbpBasedFrame) {
798 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
799 .count() == 0);
800 PreservedRegsSizeBytes += 4;
Jan Voungbd385e42014-09-18 18:18:10 -0700801 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
802 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung0b9eee52014-10-07 11:20:10 -0700803 _push(ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700804 _mov(ebp, esp);
805 }
806
Matt Walad4799f42014-08-14 14:24:12 -0700807 // Align the variables area. SpillAreaPaddingBytes is the size of
808 // the region after the preserved registers and before the spill
809 // areas.
810 uint32_t SpillAreaPaddingBytes = 0;
811 if (SpillAreaAlignmentBytes) {
812 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
813 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
814 uint32_t SpillAreaStart =
815 applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
816 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
817 SpillAreaSizeBytes += SpillAreaPaddingBytes;
Matt Wala105b7042014-08-11 19:56:19 -0700818 }
819
Matt Walad4799f42014-08-14 14:24:12 -0700820 // If there are separate globals and locals areas, make sure the
821 // locals area is aligned by padding the end of the globals area.
822 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
823 if (LocalsSlotsAlignmentBytes) {
824 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
825 GlobalsAndSubsequentPaddingSize =
826 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
827 SpillAreaSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;
828 }
829
830 // Align esp if necessary.
831 if (NeedsStackAlignment) {
832 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
833 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
834 SpillAreaSizeBytes = StackSize - StackOffset;
835 }
836
837 // Generate "sub esp, SpillAreaSizeBytes"
838 if (SpillAreaSizeBytes)
Jan Voungbd385e42014-09-18 18:18:10 -0700839 _sub(getPhysicalRegister(RegX8632::Reg_esp),
Jim Stichnothd2cb4362014-11-20 11:24:42 -0800840 Ctx->getConstantInt32(SpillAreaSizeBytes));
Jim Stichnoth18735602014-09-16 19:59:35 -0700841 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700842
843 resetStackAdjustment();
844
Matt Wala45a06232014-07-09 16:33:22 -0700845 // Fill in stack offsets for stack args, and copy args into registers
846 // for those that were register-allocated. Args are pushed right to
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700847 // left, so Arg[0] is closest to the stack/frame pointer.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700848 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Matt Wala105b7042014-08-11 19:56:19 -0700849 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700850 if (!IsEbpBasedFrame)
Matt Walad4799f42014-08-14 14:24:12 -0700851 BasicFrameOffset += SpillAreaSizeBytes;
Matt Wala45a06232014-07-09 16:33:22 -0700852
853 unsigned NumXmmArgs = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700854 for (SizeT i = 0; i < Args.size(); ++i) {
855 Variable *Arg = Args[i];
Matt Wala45a06232014-07-09 16:33:22 -0700856 // Skip arguments passed in registers.
857 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
858 ++NumXmmArgs;
859 continue;
860 }
861 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700862 }
863
864 // Fill in stack offsets for locals.
Matt Walad4799f42014-08-14 14:24:12 -0700865 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700866 LocalsSize.assign(LocalsSize.size(), 0);
Matt Walad4799f42014-08-14 14:24:12 -0700867 size_t NextStackOffset = GlobalsSpaceUsed;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700868 for (Variable *Var : SortedSpilledVariables) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700869 size_t Increment = typeWidthInBytesOnStack(Var->getType());
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700870 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
871 if (VMetadata->isMultiBlock(Var)) {
Matt Walad4799f42014-08-14 14:24:12 -0700872 GlobalsSpaceUsed += Increment;
873 NextStackOffset = GlobalsSpaceUsed;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700874 } else {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700875 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700876 LocalsSize[NodeIndex] += Increment;
Matt Walad4799f42014-08-14 14:24:12 -0700877 NextStackOffset = SpillAreaPaddingBytes +
878 GlobalsAndSubsequentPaddingSize +
879 LocalsSize[NodeIndex];
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700880 }
881 } else {
882 NextStackOffset += Increment;
883 }
884 if (IsEbpBasedFrame)
885 Var->setStackOffset(-NextStackOffset);
886 else
Matt Walad4799f42014-08-14 14:24:12 -0700887 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700888 }
Matt Walad4799f42014-08-14 14:24:12 -0700889 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700890 this->HasComputedFrame = true;
891
Matt Walad4799f42014-08-14 14:24:12 -0700892 // Assign stack offsets to variables that have been linked to spilled
893 // variables.
Jim Stichnothf44f3712014-10-01 14:05:51 -0700894 for (Variable *Var : VariablesLinkedToSpillSlots) {
Jim Stichnoth800dab22014-09-20 12:25:02 -0700895 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
Matt Walad4799f42014-08-14 14:24:12 -0700896 Var->setStackOffset(Linked->getStackOffset());
897 }
898
Jim Stichnothfa4efea2015-01-27 05:06:03 -0800899 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
Jim Stichnothe4a8f402015-01-20 12:52:51 -0800900 OstreamLocker L(Func->getContext());
Matt Walad4799f42014-08-14 14:24:12 -0700901 Ostream &Str = Func->getContext()->getStrDump();
902
903 Str << "Stack layout:\n";
904 uint32_t EspAdjustmentPaddingSize =
905 SpillAreaSizeBytes - LocalsSpillAreaSize -
906 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
907 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
908 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
909 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
910 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
911 << " globals spill area = " << GlobalsSize << " bytes\n"
912 << " globals-locals spill areas intermediate padding = "
913 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
914 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
915 << " esp alignment padding = " << EspAdjustmentPaddingSize
916 << " bytes\n";
917
918 Str << "Stack details:\n"
919 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
920 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
921 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
922 << " bytes\n"
923 << " is ebp based = " << IsEbpBasedFrame << "\n";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700924 }
925}
926
927void TargetX8632::addEpilog(CfgNode *Node) {
928 InstList &Insts = Node->getInsts();
929 InstList::reverse_iterator RI, E;
930 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
931 if (llvm::isa<InstX8632Ret>(*RI))
932 break;
933 }
934 if (RI == E)
935 return;
936
937 // Convert the reverse_iterator position into its corresponding
938 // (forward) iterator position.
939 InstList::iterator InsertPoint = RI.base();
940 --InsertPoint;
941 Context.init(Node);
942 Context.setInsertPoint(InsertPoint);
943
Jan Voungbd385e42014-09-18 18:18:10 -0700944 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700945 if (IsEbpBasedFrame) {
Jan Voungbd385e42014-09-18 18:18:10 -0700946 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700947 _mov(esp, ebp);
948 _pop(ebp);
949 } else {
Matt Walad4799f42014-08-14 14:24:12 -0700950 // add esp, SpillAreaSizeBytes
951 if (SpillAreaSizeBytes)
Jim Stichnothd2cb4362014-11-20 11:24:42 -0800952 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700953 }
954
955 // Add pop instructions for preserved registers.
956 llvm::SmallBitVector CalleeSaves =
957 getRegisterSet(RegSet_CalleeSave, RegSet_None);
958 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
959 SizeT j = CalleeSaves.size() - i - 1;
Jan Voungbd385e42014-09-18 18:18:10 -0700960 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700961 continue;
962 if (CalleeSaves[j] && RegsUsed[j]) {
963 _pop(getPhysicalRegister(j));
964 }
965 }
966}
967
968void TargetX8632::split64(Variable *Var) {
969 switch (Var->getType()) {
970 default:
971 return;
972 case IceType_i64:
973 // TODO: Only consider F64 if we need to push each half when
974 // passing as an argument to a function call. Note that each half
975 // is still typed as I32.
976 case IceType_f64:
977 break;
978 }
979 Variable *Lo = Var->getLo();
980 Variable *Hi = Var->getHi();
981 if (Lo) {
982 assert(Hi);
983 return;
984 }
Jim Stichnothae953202014-12-20 06:17:49 -0800985 assert(Hi == nullptr);
Jim Stichnoth9a04c072014-12-11 15:51:42 -0800986 Lo = Func->makeVariable(IceType_i32);
987 Hi = Func->makeVariable(IceType_i32);
988 if (ALLOW_DUMP) {
989 Lo->setName(Func, Var->getName(Func) + "__lo");
990 Hi->setName(Func, Var->getName(Func) + "__hi");
991 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700992 Var->setLoHi(Lo, Hi);
993 if (Var->getIsArg()) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700994 Lo->setIsArg();
995 Hi->setIsArg();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700996 }
997}
998
999Operand *TargetX8632::loOperand(Operand *Operand) {
1000 assert(Operand->getType() == IceType_i64);
1001 if (Operand->getType() != IceType_i64)
1002 return Operand;
1003 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1004 split64(Var);
1005 return Var->getLo();
1006 }
Jan Voungbc004632014-09-16 15:09:10 -07001007 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001008 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001009 }
1010 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1011 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
1012 Mem->getOffset(), Mem->getIndex(),
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001013 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001014 }
1015 llvm_unreachable("Unsupported operand type");
Jim Stichnothae953202014-12-20 06:17:49 -08001016 return nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001017}
1018
1019Operand *TargetX8632::hiOperand(Operand *Operand) {
1020 assert(Operand->getType() == IceType_i64);
1021 if (Operand->getType() != IceType_i64)
1022 return Operand;
1023 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1024 split64(Var);
1025 return Var->getHi();
1026 }
Jan Voungbc004632014-09-16 15:09:10 -07001027 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1028 return Ctx->getConstantInt32(
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001029 static_cast<uint32_t>(Const->getValue() >> 32));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001030 }
1031 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1032 Constant *Offset = Mem->getOffset();
Jim Stichnothae953202014-12-20 06:17:49 -08001033 if (Offset == nullptr) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001034 Offset = Ctx->getConstantInt32(4);
1035 } else if (ConstantInteger32 *IntOffset =
1036 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1037 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001038 } else if (ConstantRelocatable *SymOffset =
1039 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
Jan Voungfe14fb82014-10-13 15:56:32 -07001040 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001041 Offset =
1042 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
1043 SymOffset->getSuppressMangling());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001044 }
1045 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001046 Mem->getIndex(), Mem->getShift(),
1047 Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001048 }
1049 llvm_unreachable("Unsupported operand type");
Jim Stichnothae953202014-12-20 06:17:49 -08001050 return nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001051}
1052
1053llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
1054 RegSetMask Exclude) const {
Jan Voungbd385e42014-09-18 18:18:10 -07001055 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001056
Jan Voungbd385e42014-09-18 18:18:10 -07001057#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001058 frameptr, isI8, isInt, isFP) \
1059 if (scratch && (Include & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001060 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001061 if (preserved && (Include & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001062 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001063 if (stackptr && (Include & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001064 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001065 if (frameptr && (Include & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001066 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001067 if (scratch && (Exclude & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001068 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001069 if (preserved && (Exclude & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001070 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001071 if (stackptr && (Exclude & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001072 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001073 if (frameptr && (Exclude & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001074 Registers[RegX8632::val] = false;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001075
1076 REGX8632_TABLE
1077
1078#undef X
1079
1080 return Registers;
1081}
1082
1083void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
1084 IsEbpBasedFrame = true;
Matt Wala105b7042014-08-11 19:56:19 -07001085 // Conservatively require the stack to be aligned. Some stack
1086 // adjustment operations implemented below assume that the stack is
1087 // aligned before the alloca. All the alloca code ensures that the
1088 // stack alignment is preserved after the alloca. The stack alignment
1089 // restriction can be relaxed in some cases.
1090 NeedsStackAlignment = true;
1091
1092 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.
Jan Voungbd385e42014-09-18 18:18:10 -07001093 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001094 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1095 Variable *Dest = Inst->getDest();
Matt Wala105b7042014-08-11 19:56:19 -07001096 uint32_t AlignmentParam = Inst->getAlignInBytes();
Jim Stichnoth72a8f8d2014-09-08 17:56:50 -07001097 // For default align=0, set it to the real value 1, to avoid any
1098 // bit-manipulation problems below.
1099 AlignmentParam = std::max(AlignmentParam, 1u);
Matt Wala105b7042014-08-11 19:56:19 -07001100
1101 // LLVM enforces power of 2 alignment.
1102 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
1103 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
1104
1105 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1106 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001107 _and(esp, Ctx->getConstantInt32(-Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001108 }
Jan Voungbc004632014-09-16 15:09:10 -07001109 if (ConstantInteger32 *ConstantTotalSize =
1110 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
Matt Wala105b7042014-08-11 19:56:19 -07001111 uint32_t Value = ConstantTotalSize->getValue();
Matt Walad4799f42014-08-14 14:24:12 -07001112 Value = applyAlignment(Value, Alignment);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001113 _sub(esp, Ctx->getConstantInt32(Value));
Matt Wala105b7042014-08-11 19:56:19 -07001114 } else {
1115 // Non-constant sizes need to be adjusted to the next highest
1116 // multiple of the required alignment at runtime.
1117 Variable *T = makeReg(IceType_i32);
1118 _mov(T, TotalSize);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001119 _add(T, Ctx->getConstantInt32(Alignment - 1));
1120 _and(T, Ctx->getConstantInt32(-Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001121 _sub(esp, T);
1122 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001123 _mov(Dest, esp);
1124}
1125
1126void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1127 Variable *Dest = Inst->getDest();
1128 Operand *Src0 = legalize(Inst->getSrc(0));
1129 Operand *Src1 = legalize(Inst->getSrc(1));
1130 if (Dest->getType() == IceType_i64) {
1131 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1132 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1133 Operand *Src0Lo = loOperand(Src0);
1134 Operand *Src0Hi = hiOperand(Src0);
1135 Operand *Src1Lo = loOperand(Src1);
1136 Operand *Src1Hi = hiOperand(Src1);
Jim Stichnothae953202014-12-20 06:17:49 -08001137 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001138 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001139 case InstArithmetic::_num:
1140 llvm_unreachable("Unknown arithmetic operator");
1141 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001142 case InstArithmetic::Add:
1143 _mov(T_Lo, Src0Lo);
1144 _add(T_Lo, Src1Lo);
1145 _mov(DestLo, T_Lo);
1146 _mov(T_Hi, Src0Hi);
1147 _adc(T_Hi, Src1Hi);
1148 _mov(DestHi, T_Hi);
1149 break;
1150 case InstArithmetic::And:
1151 _mov(T_Lo, Src0Lo);
1152 _and(T_Lo, Src1Lo);
1153 _mov(DestLo, T_Lo);
1154 _mov(T_Hi, Src0Hi);
1155 _and(T_Hi, Src1Hi);
1156 _mov(DestHi, T_Hi);
1157 break;
1158 case InstArithmetic::Or:
1159 _mov(T_Lo, Src0Lo);
1160 _or(T_Lo, Src1Lo);
1161 _mov(DestLo, T_Lo);
1162 _mov(T_Hi, Src0Hi);
1163 _or(T_Hi, Src1Hi);
1164 _mov(DestHi, T_Hi);
1165 break;
1166 case InstArithmetic::Xor:
1167 _mov(T_Lo, Src0Lo);
1168 _xor(T_Lo, Src1Lo);
1169 _mov(DestLo, T_Lo);
1170 _mov(T_Hi, Src0Hi);
1171 _xor(T_Hi, Src1Hi);
1172 _mov(DestHi, T_Hi);
1173 break;
1174 case InstArithmetic::Sub:
1175 _mov(T_Lo, Src0Lo);
1176 _sub(T_Lo, Src1Lo);
1177 _mov(DestLo, T_Lo);
1178 _mov(T_Hi, Src0Hi);
1179 _sbb(T_Hi, Src1Hi);
1180 _mov(DestHi, T_Hi);
1181 break;
1182 case InstArithmetic::Mul: {
Jim Stichnothae953202014-12-20 06:17:49 -08001183 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jan Voungbd385e42014-09-18 18:18:10 -07001184 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
1185 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001186 // gcc does the following:
1187 // a=b*c ==>
1188 // t1 = b.hi; t1 *=(imul) c.lo
1189 // t2 = c.hi; t2 *=(imul) b.lo
1190 // t3:eax = b.lo
1191 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1192 // a.lo = t4.lo
1193 // t4.hi += t1
1194 // t4.hi += t2
1195 // a.hi = t4.hi
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07001196 // The mul instruction cannot take an immediate operand.
1197 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001198 _mov(T_1, Src0Hi);
1199 _imul(T_1, Src1Lo);
1200 _mov(T_2, Src1Hi);
1201 _imul(T_2, Src0Lo);
Jan Voungbd385e42014-09-18 18:18:10 -07001202 _mov(T_3, Src0Lo, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001203 _mul(T_4Lo, T_3, Src1Lo);
1204 // The mul instruction produces two dest variables, edx:eax. We
1205 // create a fake definition of edx to account for this.
1206 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1207 _mov(DestLo, T_4Lo);
1208 _add(T_4Hi, T_1);
1209 _add(T_4Hi, T_2);
1210 _mov(DestHi, T_4Hi);
1211 } break;
1212 case InstArithmetic::Shl: {
1213 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1214 // gcc does the following:
1215 // a=b<<c ==>
1216 // t1:ecx = c.lo & 0xff
1217 // t2 = b.lo
1218 // t3 = b.hi
1219 // t3 = shld t3, t2, t1
1220 // t2 = shl t2, t1
1221 // test t1, 0x20
1222 // je L1
1223 // use(t3)
1224 // t3 = t2
1225 // t2 = 0
1226 // L1:
1227 // a.lo = t2
1228 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001229 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001230 Constant *BitTest = Ctx->getConstantInt32(0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001231 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001232 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001233 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001234 _mov(T_2, Src0Lo);
1235 _mov(T_3, Src0Hi);
1236 _shld(T_3, T_2, T_1);
1237 _shl(T_2, T_1);
1238 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001239 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001240 // T_2 and T_3 are being assigned again because of the
1241 // intra-block control flow, so we need the _mov_nonkillable
1242 // variant to avoid liveness problems.
1243 _mov_nonkillable(T_3, T_2);
1244 _mov_nonkillable(T_2, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001245 Context.insert(Label);
1246 _mov(DestLo, T_2);
1247 _mov(DestHi, T_3);
1248 } break;
1249 case InstArithmetic::Lshr: {
1250 // a=b>>c (unsigned) ==>
1251 // t1:ecx = c.lo & 0xff
1252 // t2 = b.lo
1253 // t3 = b.hi
1254 // t2 = shrd t2, t3, t1
1255 // t3 = shr t3, t1
1256 // test t1, 0x20
1257 // je L1
1258 // use(t2)
1259 // t2 = t3
1260 // t3 = 0
1261 // L1:
1262 // a.lo = t2
1263 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001264 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001265 Constant *BitTest = Ctx->getConstantInt32(0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001266 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001267 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001268 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001269 _mov(T_2, Src0Lo);
1270 _mov(T_3, Src0Hi);
1271 _shrd(T_2, T_3, T_1);
1272 _shr(T_3, T_1);
1273 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001274 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001275 // T_2 and T_3 are being assigned again because of the
1276 // intra-block control flow, so we need the _mov_nonkillable
1277 // variant to avoid liveness problems.
1278 _mov_nonkillable(T_2, T_3);
1279 _mov_nonkillable(T_3, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001280 Context.insert(Label);
1281 _mov(DestLo, T_2);
1282 _mov(DestHi, T_3);
1283 } break;
1284 case InstArithmetic::Ashr: {
1285 // a=b>>c (signed) ==>
1286 // t1:ecx = c.lo & 0xff
1287 // t2 = b.lo
1288 // t3 = b.hi
1289 // t2 = shrd t2, t3, t1
1290 // t3 = sar t3, t1
1291 // test t1, 0x20
1292 // je L1
1293 // use(t2)
1294 // t2 = t3
1295 // t3 = sar t3, 0x1f
1296 // L1:
1297 // a.lo = t2
1298 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001299 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001300 Constant *BitTest = Ctx->getConstantInt32(0x20);
1301 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001302 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001303 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001304 _mov(T_2, Src0Lo);
1305 _mov(T_3, Src0Hi);
1306 _shrd(T_2, T_3, T_1);
1307 _sar(T_3, T_1);
1308 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001309 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001310 // T_2 and T_3 are being assigned again because of the
1311 // intra-block control flow, so T_2 needs the _mov_nonkillable
1312 // variant to avoid liveness problems. T_3 doesn't need special
1313 // treatment because it is reassigned via _sar instead of _mov.
1314 _mov_nonkillable(T_2, T_3);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001315 _sar(T_3, SignExtend);
1316 Context.insert(Label);
1317 _mov(DestLo, T_2);
1318 _mov(DestHi, T_3);
1319 } break;
1320 case InstArithmetic::Udiv: {
1321 const SizeT MaxSrcs = 2;
1322 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
1323 Call->addArg(Inst->getSrc(0));
1324 Call->addArg(Inst->getSrc(1));
1325 lowerCall(Call);
1326 } break;
1327 case InstArithmetic::Sdiv: {
1328 const SizeT MaxSrcs = 2;
1329 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
1330 Call->addArg(Inst->getSrc(0));
1331 Call->addArg(Inst->getSrc(1));
1332 lowerCall(Call);
1333 } break;
1334 case InstArithmetic::Urem: {
1335 const SizeT MaxSrcs = 2;
1336 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
1337 Call->addArg(Inst->getSrc(0));
1338 Call->addArg(Inst->getSrc(1));
1339 lowerCall(Call);
1340 } break;
1341 case InstArithmetic::Srem: {
1342 const SizeT MaxSrcs = 2;
1343 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
1344 Call->addArg(Inst->getSrc(0));
1345 Call->addArg(Inst->getSrc(1));
1346 lowerCall(Call);
1347 } break;
1348 case InstArithmetic::Fadd:
1349 case InstArithmetic::Fsub:
1350 case InstArithmetic::Fmul:
1351 case InstArithmetic::Fdiv:
1352 case InstArithmetic::Frem:
1353 llvm_unreachable("FP instruction with i64 type");
1354 break;
1355 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001356 } else if (isVectorType(Dest->getType())) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001357 // TODO: Trap on integer divide and integer modulo by zero.
1358 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
Matt Wala8d1072e2014-07-11 15:43:51 -07001359 switch (Inst->getOp()) {
1360 case InstArithmetic::_num:
1361 llvm_unreachable("Unknown arithmetic operator");
1362 break;
Matt Wala7fa22d82014-07-17 12:41:31 -07001363 case InstArithmetic::Add: {
1364 Variable *T = makeReg(Dest->getType());
1365 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001366 _padd(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001367 _movp(Dest, T);
1368 } break;
1369 case InstArithmetic::And: {
1370 Variable *T = makeReg(Dest->getType());
1371 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001372 _pand(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001373 _movp(Dest, T);
1374 } break;
1375 case InstArithmetic::Or: {
1376 Variable *T = makeReg(Dest->getType());
1377 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001378 _por(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001379 _movp(Dest, T);
1380 } break;
1381 case InstArithmetic::Xor: {
1382 Variable *T = makeReg(Dest->getType());
1383 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001384 _pxor(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001385 _movp(Dest, T);
1386 } break;
1387 case InstArithmetic::Sub: {
1388 Variable *T = makeReg(Dest->getType());
1389 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001390 _psub(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001391 _movp(Dest, T);
1392 } break;
1393 case InstArithmetic::Mul: {
Matt Wala0a450512014-07-30 12:44:39 -07001394 bool TypesAreValidForPmull =
1395 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1396 bool InstructionSetIsValidForPmull =
1397 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1398 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1399 Variable *T = makeReg(Dest->getType());
1400 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001401 _pmull(T, Src1);
Matt Wala0a450512014-07-30 12:44:39 -07001402 _movp(Dest, T);
1403 } else if (Dest->getType() == IceType_v4i32) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001404 // Lowering sequence:
1405 // Note: The mask arguments have index 0 on the left.
1406 //
1407 // movups T1, Src0
1408 // pshufd T2, Src0, {1,0,3,0}
1409 // pshufd T3, Src1, {1,0,3,0}
1410 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1411 // pmuludq T1, Src1
1412 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1413 // pmuludq T2, T3
1414 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1415 // shufps T1, T2, {0,2,0,2}
1416 // pshufd T4, T1, {0,2,1,3}
1417 // movups Dest, T4
Matt Wala7fa22d82014-07-17 12:41:31 -07001418
1419 // Mask that directs pshufd to create a vector with entries
1420 // Src[1, 0, 3, 0]
1421 const unsigned Constant1030 = 0x31;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001422 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
Matt Wala7fa22d82014-07-17 12:41:31 -07001423 // Mask that directs shufps to create a vector with entries
1424 // Dest[0, 2], Src[0, 2]
1425 const unsigned Mask0202 = 0x88;
1426 // Mask that directs pshufd to create a vector with entries
1427 // Src[0, 2, 1, 3]
1428 const unsigned Mask0213 = 0xd8;
1429 Variable *T1 = makeReg(IceType_v4i32);
1430 Variable *T2 = makeReg(IceType_v4i32);
1431 Variable *T3 = makeReg(IceType_v4i32);
1432 Variable *T4 = makeReg(IceType_v4i32);
1433 _movp(T1, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001434 _pshufd(T2, Src0, Mask1030);
1435 _pshufd(T3, Src1, Mask1030);
1436 _pmuludq(T1, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001437 _pmuludq(T2, T3);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001438 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1439 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
Matt Wala7fa22d82014-07-17 12:41:31 -07001440 _movp(Dest, T4);
Matt Wala7fa22d82014-07-17 12:41:31 -07001441 } else {
1442 assert(Dest->getType() == IceType_v16i8);
Matt Walaafeaee42014-08-07 13:47:30 -07001443 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001444 }
1445 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001446 case InstArithmetic::Shl:
1447 case InstArithmetic::Lshr:
1448 case InstArithmetic::Ashr:
1449 case InstArithmetic::Udiv:
1450 case InstArithmetic::Urem:
1451 case InstArithmetic::Sdiv:
1452 case InstArithmetic::Srem:
1453 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1454 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001455 case InstArithmetic::Fadd: {
1456 Variable *T = makeReg(Dest->getType());
1457 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001458 _addps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001459 _movp(Dest, T);
1460 } break;
1461 case InstArithmetic::Fsub: {
1462 Variable *T = makeReg(Dest->getType());
1463 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001464 _subps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001465 _movp(Dest, T);
1466 } break;
1467 case InstArithmetic::Fmul: {
1468 Variable *T = makeReg(Dest->getType());
1469 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001470 _mulps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001471 _movp(Dest, T);
1472 } break;
1473 case InstArithmetic::Fdiv: {
1474 Variable *T = makeReg(Dest->getType());
1475 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001476 _divps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001477 _movp(Dest, T);
1478 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001479 case InstArithmetic::Frem:
1480 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1481 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001482 }
1483 } else { // Dest->getType() is non-i64 scalar
Jim Stichnothae953202014-12-20 06:17:49 -08001484 Variable *T_edx = nullptr;
1485 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001486 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001487 case InstArithmetic::_num:
1488 llvm_unreachable("Unknown arithmetic operator");
1489 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001490 case InstArithmetic::Add:
1491 _mov(T, Src0);
1492 _add(T, Src1);
1493 _mov(Dest, T);
1494 break;
1495 case InstArithmetic::And:
1496 _mov(T, Src0);
1497 _and(T, Src1);
1498 _mov(Dest, T);
1499 break;
1500 case InstArithmetic::Or:
1501 _mov(T, Src0);
1502 _or(T, Src1);
1503 _mov(Dest, T);
1504 break;
1505 case InstArithmetic::Xor:
1506 _mov(T, Src0);
1507 _xor(T, Src1);
1508 _mov(Dest, T);
1509 break;
1510 case InstArithmetic::Sub:
1511 _mov(T, Src0);
1512 _sub(T, Src1);
1513 _mov(Dest, T);
1514 break;
1515 case InstArithmetic::Mul:
1516 // TODO: Optimize for llvm::isa<Constant>(Src1)
1517 // TODO: Strength-reduce multiplications by a constant,
1518 // particularly -1 and powers of 2. Advanced: use lea to
1519 // multiply by 3, 5, 9.
1520 //
1521 // The 8-bit version of imul only allows the form "imul r/m8"
1522 // where T must be in eax.
Jan Voung0ac50dc2014-09-30 08:36:06 -07001523 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001524 _mov(T, Src0, RegX8632::Reg_eax);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001525 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1526 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001527 _mov(T, Src0);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001528 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001529 _imul(T, Src1);
1530 _mov(Dest, T);
1531 break;
1532 case InstArithmetic::Shl:
1533 _mov(T, Src0);
1534 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001535 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001536 _shl(T, Src1);
1537 _mov(Dest, T);
1538 break;
1539 case InstArithmetic::Lshr:
1540 _mov(T, Src0);
1541 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001542 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001543 _shr(T, Src1);
1544 _mov(Dest, T);
1545 break;
1546 case InstArithmetic::Ashr:
1547 _mov(T, Src0);
1548 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001549 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001550 _sar(T, Src1);
1551 _mov(Dest, T);
1552 break;
1553 case InstArithmetic::Udiv:
Jan Voung70d68832014-06-17 10:02:37 -07001554 // div and idiv are the few arithmetic operators that do not allow
1555 // immediates as the operand.
1556 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001557 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnothae953202014-12-20 06:17:49 -08001558 Variable *T_ah = nullptr;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001559 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001560 _mov(T, Src0, RegX8632::Reg_eax);
1561 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001562 _div(T, Src1, T_ah);
1563 _mov(Dest, T);
1564 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001565 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001566 _mov(T, Src0, RegX8632::Reg_eax);
1567 _mov(T_edx, Zero, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001568 _div(T, Src1, T_edx);
1569 _mov(Dest, T);
1570 }
1571 break;
1572 case InstArithmetic::Sdiv:
Jan Voung70d68832014-06-17 10:02:37 -07001573 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001574 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001575 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001576 _cbwdq(T, T);
1577 _idiv(T, Src1, T);
1578 _mov(Dest, T);
1579 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001580 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1581 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001582 _cbwdq(T_edx, T);
1583 _idiv(T, Src1, T_edx);
1584 _mov(Dest, T);
1585 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001586 break;
1587 case InstArithmetic::Urem:
Jan Voung70d68832014-06-17 10:02:37 -07001588 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001589 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnothae953202014-12-20 06:17:49 -08001590 Variable *T_ah = nullptr;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001591 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001592 _mov(T, Src0, RegX8632::Reg_eax);
1593 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001594 _div(T_ah, Src1, T);
1595 _mov(Dest, T_ah);
1596 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001597 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001598 _mov(T_edx, Zero, RegX8632::Reg_edx);
1599 _mov(T, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001600 _div(T_edx, Src1, T);
1601 _mov(Dest, T_edx);
1602 }
1603 break;
1604 case InstArithmetic::Srem:
Jan Voung70d68832014-06-17 10:02:37 -07001605 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001606 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001607 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
1608 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001609 _cbwdq(T, T);
1610 Context.insert(InstFakeDef::create(Func, T_ah));
1611 _idiv(T_ah, Src1, T);
1612 _mov(Dest, T_ah);
1613 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001614 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1615 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001616 _cbwdq(T_edx, T);
1617 _idiv(T_edx, Src1, T);
1618 _mov(Dest, T_edx);
1619 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001620 break;
1621 case InstArithmetic::Fadd:
1622 _mov(T, Src0);
1623 _addss(T, Src1);
1624 _mov(Dest, T);
1625 break;
1626 case InstArithmetic::Fsub:
1627 _mov(T, Src0);
1628 _subss(T, Src1);
1629 _mov(Dest, T);
1630 break;
1631 case InstArithmetic::Fmul:
1632 _mov(T, Src0);
1633 _mulss(T, Src1);
1634 _mov(Dest, T);
1635 break;
1636 case InstArithmetic::Fdiv:
1637 _mov(T, Src0);
1638 _divss(T, Src1);
1639 _mov(Dest, T);
1640 break;
1641 case InstArithmetic::Frem: {
1642 const SizeT MaxSrcs = 2;
1643 Type Ty = Dest->getType();
Jan Voung3a569182014-09-29 10:16:01 -07001644 InstCall *Call = makeHelperCall(
1645 isFloat32Asserting32Or64(Ty) ? "fmodf" : "fmod", Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001646 Call->addArg(Src0);
1647 Call->addArg(Src1);
1648 return lowerCall(Call);
1649 } break;
1650 }
1651 }
1652}
1653
1654void TargetX8632::lowerAssign(const InstAssign *Inst) {
1655 Variable *Dest = Inst->getDest();
1656 Operand *Src0 = Inst->getSrc(0);
1657 assert(Dest->getType() == Src0->getType());
1658 if (Dest->getType() == IceType_i64) {
1659 Src0 = legalize(Src0);
1660 Operand *Src0Lo = loOperand(Src0);
1661 Operand *Src0Hi = hiOperand(Src0);
1662 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1663 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Jim Stichnothae953202014-12-20 06:17:49 -08001664 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001665 _mov(T_Lo, Src0Lo);
1666 _mov(DestLo, T_Lo);
1667 _mov(T_Hi, Src0Hi);
1668 _mov(DestHi, T_Hi);
1669 } else {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07001670 // If Dest is in memory, then RI is either a physical register or
1671 // an immediate, otherwise RI can be anything.
1672 Operand *RI =
1673 legalize(Src0, Dest->hasReg() ? Legal_All : Legal_Reg | Legal_Imm);
Matt Wala45a06232014-07-09 16:33:22 -07001674 if (isVectorType(Dest->getType()))
1675 _movp(Dest, RI);
1676 else
1677 _mov(Dest, RI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001678 }
1679}
1680
1681void TargetX8632::lowerBr(const InstBr *Inst) {
1682 if (Inst->isUnconditional()) {
1683 _br(Inst->getTargetUnconditional());
1684 } else {
Jim Stichnoth206833c2014-08-07 10:58:05 -07001685 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001686 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001687 _cmp(Src0, Zero);
Jan Voungbd385e42014-09-18 18:18:10 -07001688 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001689 }
1690}
1691
1692void TargetX8632::lowerCall(const InstCall *Instr) {
Matt Wala105b7042014-08-11 19:56:19 -07001693 // x86-32 calling convention:
1694 //
1695 // * At the point before the call, the stack must be aligned to 16
1696 // bytes.
1697 //
1698 // * The first four arguments of vector type, regardless of their
1699 // position relative to the other arguments in the argument list, are
1700 // placed in registers xmm0 - xmm3.
1701 //
1702 // * Other arguments are pushed onto the stack in right-to-left order,
1703 // such that the left-most argument ends up on the top of the stack at
1704 // the lowest memory address.
1705 //
1706 // * Stack arguments of vector type are aligned to start at the next
1707 // highest multiple of 16 bytes. Other stack arguments are aligned to
1708 // 4 bytes.
1709 //
1710 // This intends to match the section "IA-32 Function Calling
1711 // Convention" of the document "OS X ABI Function Call Guide" by
1712 // Apple.
1713 NeedsStackAlignment = true;
1714
Jim Stichnoth1502e592014-12-11 09:22:45 -08001715 typedef std::vector<Operand *> OperandList;
Matt Wala105b7042014-08-11 19:56:19 -07001716 OperandList XmmArgs;
1717 OperandList StackArgs, StackArgLocations;
1718 uint32_t ParameterAreaSizeBytes = 0;
1719
Matt Wala45a06232014-07-09 16:33:22 -07001720 // Classify each argument operand according to the location where the
1721 // argument is passed.
Matt Wala45a06232014-07-09 16:33:22 -07001722 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1723 Operand *Arg = Instr->getArg(i);
Matt Wala105b7042014-08-11 19:56:19 -07001724 Type Ty = Arg->getType();
1725 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
Jan Voung3a569182014-09-29 10:16:01 -07001726 assert(typeWidthInBytes(Ty) >= 4);
Matt Wala105b7042014-08-11 19:56:19 -07001727 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
Matt Wala45a06232014-07-09 16:33:22 -07001728 XmmArgs.push_back(Arg);
1729 } else {
1730 StackArgs.push_back(Arg);
Matt Wala105b7042014-08-11 19:56:19 -07001731 if (isVectorType(Arg->getType())) {
1732 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1733 }
Jan Voungbd385e42014-09-18 18:18:10 -07001734 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001735 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
Matt Wala105b7042014-08-11 19:56:19 -07001736 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1737 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
Matt Wala45a06232014-07-09 16:33:22 -07001738 }
1739 }
Matt Wala105b7042014-08-11 19:56:19 -07001740
1741 // Adjust the parameter area so that the stack is aligned. It is
1742 // assumed that the stack is already aligned at the start of the
1743 // calling sequence.
1744 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1745
1746 // Subtract the appropriate amount for the argument area. This also
1747 // takes care of setting the stack adjustment during emission.
Matt Wala45a06232014-07-09 16:33:22 -07001748 //
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001749 // TODO: If for some reason the call instruction gets dead-code
1750 // eliminated after lowering, we would need to ensure that the
Matt Wala105b7042014-08-11 19:56:19 -07001751 // pre-call and the post-call esp adjustment get eliminated as well.
1752 if (ParameterAreaSizeBytes) {
1753 _adjust_stack(ParameterAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001754 }
Matt Wala105b7042014-08-11 19:56:19 -07001755
1756 // Copy arguments that are passed on the stack to the appropriate
1757 // stack locations.
1758 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
1759 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
Matt Wala105b7042014-08-11 19:56:19 -07001760 }
1761
Matt Wala45a06232014-07-09 16:33:22 -07001762 // Copy arguments to be passed in registers to the appropriate
1763 // registers.
1764 // TODO: Investigate the impact of lowering arguments passed in
1765 // registers after lowering stack arguments as opposed to the other
1766 // way around. Lowering register arguments after stack arguments may
1767 // reduce register pressure. On the other hand, lowering register
1768 // arguments first (before stack arguments) may result in more compact
1769 // code, as the memory operand displacements may end up being smaller
1770 // before any stack adjustment is done.
1771 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Jim Stichnothad403532014-09-25 12:44:17 -07001772 Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
Matt Wala45a06232014-07-09 16:33:22 -07001773 // Generate a FakeUse of register arguments so that they do not get
1774 // dead code eliminated as a result of the FakeKill of scratch
1775 // registers after the call.
1776 Context.insert(InstFakeUse::create(Func, Reg));
1777 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001778 // Generate the call instruction. Assign its result to a temporary
1779 // with high register allocation weight.
1780 Variable *Dest = Instr->getDest();
Matt Wala45a06232014-07-09 16:33:22 -07001781 // ReturnReg doubles as ReturnRegLo as necessary.
Jim Stichnothae953202014-12-20 06:17:49 -08001782 Variable *ReturnReg = nullptr;
1783 Variable *ReturnRegHi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001784 if (Dest) {
1785 switch (Dest->getType()) {
1786 case IceType_NUM:
1787 llvm_unreachable("Invalid Call dest type");
1788 break;
1789 case IceType_void:
1790 break;
1791 case IceType_i1:
1792 case IceType_i8:
1793 case IceType_i16:
1794 case IceType_i32:
Jan Voungbd385e42014-09-18 18:18:10 -07001795 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001796 break;
1797 case IceType_i64:
Jan Voungbd385e42014-09-18 18:18:10 -07001798 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
1799 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001800 break;
1801 case IceType_f32:
1802 case IceType_f64:
Jim Stichnothae953202014-12-20 06:17:49 -08001803 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
Matt Wala45a06232014-07-09 16:33:22 -07001804 // the fstp instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001805 break;
Matt Wala928f1292014-07-07 16:50:46 -07001806 case IceType_v4i1:
1807 case IceType_v8i1:
1808 case IceType_v16i1:
1809 case IceType_v16i8:
1810 case IceType_v8i16:
1811 case IceType_v4i32:
Matt Wala45a06232014-07-09 16:33:22 -07001812 case IceType_v4f32:
Jan Voungbd385e42014-09-18 18:18:10 -07001813 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
Matt Wala45a06232014-07-09 16:33:22 -07001814 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001815 }
1816 }
Jim Stichnothdd165072014-11-02 09:41:45 -08001817 Operand *CallTarget = legalize(Instr->getCallTarget());
Matt Wala45a06232014-07-09 16:33:22 -07001818 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001819 Context.insert(NewCall);
Matt Wala45a06232014-07-09 16:33:22 -07001820 if (ReturnRegHi)
1821 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001822
Matt Wala105b7042014-08-11 19:56:19 -07001823 // Add the appropriate offset to esp. The call instruction takes care
1824 // of resetting the stack offset during emission.
1825 if (ParameterAreaSizeBytes) {
Jan Voungbd385e42014-09-18 18:18:10 -07001826 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001827 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001828 }
1829
1830 // Insert a register-kill pseudo instruction.
Jim Stichnoth87ff3a12014-11-14 10:27:29 -08001831 Context.insert(InstFakeKill::create(Func, NewCall));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001832
1833 // Generate a FakeUse to keep the call live if necessary.
Matt Wala45a06232014-07-09 16:33:22 -07001834 if (Instr->hasSideEffects() && ReturnReg) {
1835 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001836 Context.insert(FakeUse);
1837 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001838
Matt Wala45a06232014-07-09 16:33:22 -07001839 if (!Dest)
1840 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001841
Matt Wala45a06232014-07-09 16:33:22 -07001842 // Assign the result of the call to Dest.
1843 if (ReturnReg) {
1844 if (ReturnRegHi) {
1845 assert(Dest->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001846 split64(Dest);
1847 Variable *DestLo = Dest->getLo();
1848 Variable *DestHi = Dest->getHi();
Matt Wala45a06232014-07-09 16:33:22 -07001849 _mov(DestLo, ReturnReg);
1850 _mov(DestHi, ReturnRegHi);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001851 } else {
Matt Wala45a06232014-07-09 16:33:22 -07001852 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1853 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1854 isVectorType(Dest->getType()));
Matt Wala45a06232014-07-09 16:33:22 -07001855 if (isVectorType(Dest->getType())) {
1856 _movp(Dest, ReturnReg);
1857 } else {
1858 _mov(Dest, ReturnReg);
1859 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001860 }
Jan Voung3a569182014-09-29 10:16:01 -07001861 } else if (isScalarFloatingType(Dest->getType())) {
Matt Wala45a06232014-07-09 16:33:22 -07001862 // Special treatment for an FP function which returns its result in
1863 // st(0).
Matt Wala45a06232014-07-09 16:33:22 -07001864 // If Dest ends up being a physical xmm register, the fstp emit code
1865 // will route st(0) through a temporary stack slot.
Jim Stichnotha5229722014-09-12 13:06:09 -07001866 _fstp(Dest);
1867 // Create a fake use of Dest in case it actually isn't used,
1868 // because st(0) still needs to be popped.
1869 Context.insert(InstFakeUse::create(Func, Dest));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001870 }
1871}
1872
1873void TargetX8632::lowerCast(const InstCast *Inst) {
1874 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1875 InstCast::OpKind CastKind = Inst->getCastKind();
1876 Variable *Dest = Inst->getDest();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001877 switch (CastKind) {
1878 default:
1879 Func->setError("Cast type not supported");
1880 return;
Jan Voung1ee34162014-06-24 13:43:30 -07001881 case InstCast::Sext: {
1882 // Src0RM is the source operand legalized to physical register or memory,
1883 // but not immediate, since the relevant x86 native instructions don't
1884 // allow an immediate operand. If the operand is an immediate, we could
1885 // consider computing the strength-reduced result at translation time,
1886 // but we're unlikely to see something like that in the bitcode that
1887 // the optimizer wouldn't have already taken care of.
1888 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001889 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07001890 Type DestTy = Dest->getType();
1891 if (DestTy == IceType_v16i8) {
1892 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
1893 Variable *OneMask = makeVectorOfOnes(Dest->getType());
1894 Variable *T = makeReg(DestTy);
1895 _movp(T, Src0RM);
1896 _pand(T, OneMask);
1897 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1898 _pcmpgt(T, Zeros);
1899 _movp(Dest, T);
1900 } else {
1901 // width = width(elty) - 1; dest = (src << width) >> width
1902 SizeT ShiftAmount =
1903 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001904 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
Matt Wala83b80362014-07-16 10:21:30 -07001905 Variable *T = makeReg(DestTy);
1906 _movp(T, Src0RM);
1907 _psll(T, ShiftConstant);
1908 _psra(T, ShiftConstant);
1909 _movp(Dest, T);
1910 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07001911 } else if (Dest->getType() == IceType_i64) {
1912 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001913 Constant *Shift = Ctx->getConstantInt32(31);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001914 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1915 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1916 Variable *T_Lo = makeReg(DestLo->getType());
1917 if (Src0RM->getType() == IceType_i32) {
1918 _mov(T_Lo, Src0RM);
1919 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001920 _movzx(T_Lo, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001921 _shl(T_Lo, Shift);
1922 _sar(T_Lo, Shift);
1923 } else {
1924 _movsx(T_Lo, Src0RM);
1925 }
1926 _mov(DestLo, T_Lo);
Jim Stichnothae953202014-12-20 06:17:49 -08001927 Variable *T_Hi = nullptr;
Jim Stichnothdd30c812014-09-04 16:39:02 -07001928 _mov(T_Hi, T_Lo);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001929 if (Src0RM->getType() != IceType_i1)
1930 // For i1, the sar instruction is already done above.
1931 _sar(T_Hi, Shift);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001932 _mov(DestHi, T_Hi);
1933 } else if (Src0RM->getType() == IceType_i1) {
1934 // t1 = src
1935 // shl t1, dst_bitwidth - 1
1936 // sar t1, dst_bitwidth - 1
1937 // dst = t1
1938 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001939 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001940 Variable *T = makeReg(Dest->getType());
1941 if (typeWidthInBytes(Dest->getType()) <=
1942 typeWidthInBytes(Src0RM->getType())) {
1943 _mov(T, Src0RM);
1944 } else {
1945 // Widen the source using movsx or movzx. (It doesn't matter
1946 // which one, since the following shl/sar overwrite the bits.)
1947 _movzx(T, Src0RM);
1948 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07001949 _shl(T, ShiftAmount);
1950 _sar(T, ShiftAmount);
1951 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001952 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001953 // t1 = movsx src; dst = t1
1954 Variable *T = makeReg(Dest->getType());
1955 _movsx(T, Src0RM);
1956 _mov(Dest, T);
1957 }
1958 break;
Jan Voung1ee34162014-06-24 13:43:30 -07001959 }
1960 case InstCast::Zext: {
1961 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001962 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07001963 // onemask = materialize(1,1,...); dest = onemask & src
1964 Type DestTy = Dest->getType();
1965 Variable *OneMask = makeVectorOfOnes(DestTy);
1966 Variable *T = makeReg(DestTy);
1967 _movp(T, Src0RM);
1968 _pand(T, OneMask);
1969 _movp(Dest, T);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001970 } else if (Dest->getType() == IceType_i64) {
1971 // t1=movzx src; dst.lo=t1; dst.hi=0
1972 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1973 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1974 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1975 Variable *Tmp = makeReg(DestLo->getType());
1976 if (Src0RM->getType() == IceType_i32) {
1977 _mov(Tmp, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07001978 } else {
1979 _movzx(Tmp, Src0RM);
1980 }
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001981 if (Src0RM->getType() == IceType_i1) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001982 Constant *One = Ctx->getConstantInt32(1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07001983 _and(Tmp, One);
1984 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07001985 _mov(DestLo, Tmp);
1986 _mov(DestHi, Zero);
1987 } else if (Src0RM->getType() == IceType_i1) {
1988 // t = Src0RM; t &= 1; Dest = t
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001989 Constant *One = Ctx->getConstantInt32(1);
Jan Voung39d4aca2014-10-15 15:16:54 -07001990 Type DestTy = Dest->getType();
1991 Variable *T;
1992 if (DestTy == IceType_i8) {
1993 T = makeReg(DestTy);
1994 _mov(T, Src0RM);
1995 } else {
1996 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
1997 T = makeReg(IceType_i32);
1998 _movzx(T, Src0RM);
1999 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002000 _and(T, One);
2001 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002002 } else {
2003 // t1 = movzx src; dst = t1
2004 Variable *T = makeReg(Dest->getType());
2005 _movzx(T, Src0RM);
2006 _mov(Dest, T);
2007 }
2008 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002009 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002010 case InstCast::Trunc: {
Matt Wala83b80362014-07-16 10:21:30 -07002011 if (isVectorType(Dest->getType())) {
2012 // onemask = materialize(1,1,...); dst = src & onemask
2013 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2014 Type Src0Ty = Src0RM->getType();
2015 Variable *OneMask = makeVectorOfOnes(Src0Ty);
2016 Variable *T = makeReg(Dest->getType());
2017 _movp(T, Src0RM);
2018 _pand(T, OneMask);
2019 _movp(Dest, T);
2020 } else {
2021 Operand *Src0 = Inst->getSrc(0);
2022 if (Src0->getType() == IceType_i64)
2023 Src0 = loOperand(Src0);
2024 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2025 // t1 = trunc Src0RM; Dest = t1
Jim Stichnothae953202014-12-20 06:17:49 -08002026 Variable *T = nullptr;
Matt Wala83b80362014-07-16 10:21:30 -07002027 _mov(T, Src0RM);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002028 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002029 _and(T, Ctx->getConstantInt1(1));
Matt Wala83b80362014-07-16 10:21:30 -07002030 _mov(Dest, T);
2031 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002032 break;
2033 }
2034 case InstCast::Fptrunc:
2035 case InstCast::Fpext: {
Jan Voung1ee34162014-06-24 13:43:30 -07002036 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002037 // t1 = cvt Src0RM; Dest = t1
2038 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002039 _cvt(T, Src0RM, InstX8632Cvt::Float2float);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002040 _mov(Dest, T);
2041 break;
2042 }
2043 case InstCast::Fptosi:
Matt Wala83b80362014-07-16 10:21:30 -07002044 if (isVectorType(Dest->getType())) {
2045 assert(Dest->getType() == IceType_v4i32 &&
2046 Inst->getSrc(0)->getType() == IceType_v4f32);
2047 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2048 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002049 _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
Matt Wala83b80362014-07-16 10:21:30 -07002050 _movp(Dest, T);
2051 } else if (Dest->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002052 // Use a helper for converting floating-point values to 64-bit
2053 // integers. SSE2 appears to have no way to convert from xmm
2054 // registers to something like the edx:eax register pair, and
2055 // gcc and clang both want to use x87 instructions complete with
2056 // temporary manipulation of the status word. This helper is
2057 // not needed for x86-64.
2058 split64(Dest);
2059 const SizeT MaxSrcs = 1;
2060 Type SrcType = Inst->getSrc(0)->getType();
2061 InstCall *Call = makeHelperCall(
Jan Voung3a569182014-09-29 10:16:01 -07002062 isFloat32Asserting32Or64(SrcType) ? "cvtftosi64" : "cvtdtosi64", Dest,
2063 MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002064 // TODO: Call the correct compiler-rt helper function.
2065 Call->addArg(Inst->getSrc(0));
2066 lowerCall(Call);
2067 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002068 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002069 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2070 Variable *T_1 = makeReg(IceType_i32);
2071 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002072 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002073 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002074 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002075 _and(T_2, Ctx->getConstantInt1(1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002076 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002077 }
2078 break;
2079 case InstCast::Fptoui:
Matt Wala83b80362014-07-16 10:21:30 -07002080 if (isVectorType(Dest->getType())) {
2081 assert(Dest->getType() == IceType_v4i32 &&
2082 Inst->getSrc(0)->getType() == IceType_v4f32);
2083 const SizeT MaxSrcs = 1;
2084 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
2085 Call->addArg(Inst->getSrc(0));
2086 lowerCall(Call);
2087 } else if (Dest->getType() == IceType_i64 ||
2088 Dest->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002089 // Use a helper for both x86-32 and x86-64.
2090 split64(Dest);
2091 const SizeT MaxSrcs = 1;
2092 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07002093 Type SrcType = Inst->getSrc(0)->getType();
Jan Voung3a569182014-09-29 10:16:01 -07002094 IceString DstSubstring = (isInt32Asserting32Or64(DestType) ? "32" : "64");
2095 IceString SrcSubstring = (isFloat32Asserting32Or64(SrcType) ? "f" : "d");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002096 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
2097 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
2098 // TODO: Call the correct compiler-rt helper function.
2099 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2100 Call->addArg(Inst->getSrc(0));
2101 lowerCall(Call);
2102 return;
2103 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002104 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002105 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2106 Variable *T_1 = makeReg(IceType_i32);
2107 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002108 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002109 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002110 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002111 _and(T_2, Ctx->getConstantInt1(1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002112 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002113 }
2114 break;
2115 case InstCast::Sitofp:
Matt Wala83b80362014-07-16 10:21:30 -07002116 if (isVectorType(Dest->getType())) {
2117 assert(Dest->getType() == IceType_v4f32 &&
2118 Inst->getSrc(0)->getType() == IceType_v4i32);
2119 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2120 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002121 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
Matt Wala83b80362014-07-16 10:21:30 -07002122 _movp(Dest, T);
2123 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002124 // Use a helper for x86-32.
2125 const SizeT MaxSrcs = 1;
2126 Type DestType = Dest->getType();
2127 InstCall *Call = makeHelperCall(
Jan Voung3a569182014-09-29 10:16:01 -07002128 isFloat32Asserting32Or64(DestType) ? "cvtsi64tof" : "cvtsi64tod",
2129 Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002130 // TODO: Call the correct compiler-rt helper function.
2131 Call->addArg(Inst->getSrc(0));
2132 lowerCall(Call);
2133 return;
2134 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002135 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002136 // Sign-extend the operand.
2137 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2138 Variable *T_1 = makeReg(IceType_i32);
2139 Variable *T_2 = makeReg(Dest->getType());
2140 if (Src0RM->getType() == IceType_i32)
2141 _mov(T_1, Src0RM);
2142 else
2143 _movsx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002144 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002145 _mov(Dest, T_2);
2146 }
2147 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002148 case InstCast::Uitofp: {
2149 Operand *Src0 = Inst->getSrc(0);
Matt Wala83b80362014-07-16 10:21:30 -07002150 if (isVectorType(Src0->getType())) {
2151 assert(Dest->getType() == IceType_v4f32 &&
2152 Src0->getType() == IceType_v4i32);
2153 const SizeT MaxSrcs = 1;
2154 InstCall *Call = makeHelperCall("Sz_uitofp_v4i32", Dest, MaxSrcs);
2155 Call->addArg(Src0);
2156 lowerCall(Call);
2157 } else if (Src0->getType() == IceType_i64 ||
2158 Src0->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002159 // Use a helper for x86-32 and x86-64. Also use a helper for
2160 // i32 on x86-32.
2161 const SizeT MaxSrcs = 1;
2162 Type DestType = Dest->getType();
Jan Voung3a569182014-09-29 10:16:01 -07002163 IceString SrcSubstring =
2164 (isInt32Asserting32Or64(Src0->getType()) ? "32" : "64");
2165 IceString DstSubstring = (isFloat32Asserting32Or64(DestType) ? "f" : "d");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002166 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
2167 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
2168 // TODO: Call the correct compiler-rt helper function.
2169 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Jan Voung1ee34162014-06-24 13:43:30 -07002170 Call->addArg(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002171 lowerCall(Call);
2172 return;
2173 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002174 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002175 // Zero-extend the operand.
2176 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2177 Variable *T_1 = makeReg(IceType_i32);
2178 Variable *T_2 = makeReg(Dest->getType());
2179 if (Src0RM->getType() == IceType_i32)
2180 _mov(T_1, Src0RM);
2181 else
2182 _movzx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002183 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002184 _mov(Dest, T_2);
2185 }
2186 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002187 }
2188 case InstCast::Bitcast: {
2189 Operand *Src0 = Inst->getSrc(0);
2190 if (Dest->getType() == Src0->getType()) {
2191 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002192 lowerAssign(Assign);
2193 return;
2194 }
2195 switch (Dest->getType()) {
2196 default:
2197 llvm_unreachable("Unexpected Bitcast dest type");
Matt Wala83b80362014-07-16 10:21:30 -07002198 case IceType_i8: {
2199 assert(Src0->getType() == IceType_v8i1);
2200 InstCall *Call = makeHelperCall("Sz_bitcast_v8i1_to_i8", Dest, 1);
2201 Call->addArg(Src0);
2202 lowerCall(Call);
2203 } break;
2204 case IceType_i16: {
2205 assert(Src0->getType() == IceType_v16i1);
2206 InstCall *Call = makeHelperCall("Sz_bitcast_v16i1_to_i16", Dest, 1);
2207 Call->addArg(Src0);
2208 lowerCall(Call);
2209 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002210 case IceType_i32:
2211 case IceType_f32: {
Jan Voung1ee34162014-06-24 13:43:30 -07002212 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002213 Type DestType = Dest->getType();
2214 Type SrcType = Src0RM->getType();
Jim Stichnoth6e992142014-07-30 14:45:20 -07002215 (void)DestType;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002216 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2217 (DestType == IceType_f32 && SrcType == IceType_i32));
2218 // a.i32 = bitcast b.f32 ==>
2219 // t.f32 = b.f32
2220 // s.f32 = spill t.f32
2221 // a.i32 = s.f32
Jim Stichnothae953202014-12-20 06:17:49 -08002222 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002223 // TODO: Should be able to force a spill setup by calling legalize() with
2224 // Legal_Mem and not Legal_Reg or Legal_Imm.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002225 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002226 SpillVar->setLinkedTo(Dest);
2227 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002228 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002229 _mov(T, Src0RM);
2230 _mov(Spill, T);
2231 _mov(Dest, Spill);
2232 } break;
2233 case IceType_i64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002234 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002235 assert(Src0RM->getType() == IceType_f64);
2236 // a.i64 = bitcast b.f64 ==>
2237 // s.f64 = spill b.f64
2238 // t_lo.i32 = lo(s.f64)
2239 // a_lo.i32 = t_lo.i32
2240 // t_hi.i32 = hi(s.f64)
2241 // a_hi.i32 = t_hi.i32
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002242 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002243 SpillVar->setLinkedTo(llvm::dyn_cast<Variable>(Src0RM));
2244 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002245 Spill->setWeight(RegWeight::Zero);
Jan Voung5cd240d2014-06-25 10:36:46 -07002246 _movq(Spill, Src0RM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002247
2248 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2249 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2250 Variable *T_Lo = makeReg(IceType_i32);
2251 Variable *T_Hi = makeReg(IceType_i32);
2252 VariableSplit *SpillLo =
2253 VariableSplit::create(Func, Spill, VariableSplit::Low);
2254 VariableSplit *SpillHi =
2255 VariableSplit::create(Func, Spill, VariableSplit::High);
2256
2257 _mov(T_Lo, SpillLo);
2258 _mov(DestLo, T_Lo);
2259 _mov(T_Hi, SpillHi);
2260 _mov(DestHi, T_Hi);
2261 } break;
2262 case IceType_f64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002263 Src0 = legalize(Src0);
2264 assert(Src0->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002265 // a.f64 = bitcast b.i64 ==>
2266 // t_lo.i32 = b_lo.i32
Jan Voung1ee34162014-06-24 13:43:30 -07002267 // FakeDef(s.f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002268 // lo(s.f64) = t_lo.i32
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002269 // t_hi.i32 = b_hi.i32
2270 // hi(s.f64) = t_hi.i32
2271 // a.f64 = s.f64
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002272 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002273 SpillVar->setLinkedTo(Dest);
2274 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002275 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002276
Jim Stichnothae953202014-12-20 06:17:49 -08002277 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002278 VariableSplit *SpillLo =
2279 VariableSplit::create(Func, Spill, VariableSplit::Low);
2280 VariableSplit *SpillHi =
2281 VariableSplit::create(Func, Spill, VariableSplit::High);
Jan Voung1ee34162014-06-24 13:43:30 -07002282 _mov(T_Lo, loOperand(Src0));
2283 // Technically, the Spill is defined after the _store happens, but
2284 // SpillLo is considered a "use" of Spill so define Spill before it
2285 // is used.
2286 Context.insert(InstFakeDef::create(Func, Spill));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002287 _store(T_Lo, SpillLo);
Jan Voung1ee34162014-06-24 13:43:30 -07002288 _mov(T_Hi, hiOperand(Src0));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002289 _store(T_Hi, SpillHi);
Jan Voung5cd240d2014-06-25 10:36:46 -07002290 _movq(Dest, Spill);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002291 } break;
Matt Wala83b80362014-07-16 10:21:30 -07002292 case IceType_v8i1: {
2293 assert(Src0->getType() == IceType_i8);
2294 InstCall *Call = makeHelperCall("Sz_bitcast_i8_to_v8i1", Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002295 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002296 // Arguments to functions are required to be at least 32 bits wide.
2297 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2298 Call->addArg(Src0AsI32);
2299 lowerCall(Call);
2300 } break;
2301 case IceType_v16i1: {
2302 assert(Src0->getType() == IceType_i16);
2303 InstCall *Call = makeHelperCall("Sz_bitcast_i16_to_v16i1", Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002304 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002305 // Arguments to functions are required to be at least 32 bits wide.
2306 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2307 Call->addArg(Src0AsI32);
2308 lowerCall(Call);
2309 } break;
2310 case IceType_v8i16:
2311 case IceType_v16i8:
2312 case IceType_v4i32:
2313 case IceType_v4f32: {
2314 _movp(Dest, legalizeToVar(Src0));
2315 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002316 }
2317 break;
2318 }
Jan Voung1ee34162014-06-24 13:43:30 -07002319 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002320}
2321
Matt Wala49889232014-07-18 12:45:09 -07002322void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002323 Operand *SourceVectNotLegalized = Inst->getSrc(0);
Jan Voungbc004632014-09-16 15:09:10 -07002324 ConstantInteger32 *ElementIndex =
2325 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
Matt Wala49889232014-07-18 12:45:09 -07002326 // Only constant indices are allowed in PNaCl IR.
2327 assert(ElementIndex);
2328
2329 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002330 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002331 Type ElementTy = typeElementType(Ty);
2332 Type InVectorElementTy = getInVectorElementType(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002333 Variable *ExtractedElementR = makeReg(InVectorElementTy);
Matt Wala49889232014-07-18 12:45:09 -07002334
2335 // TODO(wala): Determine the best lowering sequences for each type.
Matt Wala0a450512014-07-30 12:44:39 -07002336 bool CanUsePextr =
2337 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2338 if (CanUsePextr && Ty != IceType_v4f32) {
2339 // Use pextrb, pextrw, or pextrd.
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002340 Constant *Mask = Ctx->getConstantInt32(Index);
Matt Walae3777672014-07-31 09:06:17 -07002341 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2342 _pextr(ExtractedElementR, SourceVectR, Mask);
Matt Wala0a450512014-07-30 12:44:39 -07002343 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2344 // Use pshufd and movd/movss.
Jim Stichnothae953202014-12-20 06:17:49 -08002345 Variable *T = nullptr;
Matt Wala49889232014-07-18 12:45:09 -07002346 if (Index) {
2347 // The shuffle only needs to occur if the element to be extracted
2348 // is not at the lowest index.
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002349 Constant *Mask = Ctx->getConstantInt32(Index);
Matt Wala49889232014-07-18 12:45:09 -07002350 T = makeReg(Ty);
Matt Walad4799f42014-08-14 14:24:12 -07002351 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
Matt Wala49889232014-07-18 12:45:09 -07002352 } else {
Matt Walad4799f42014-08-14 14:24:12 -07002353 T = legalizeToVar(SourceVectNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002354 }
2355
2356 if (InVectorElementTy == IceType_i32) {
Matt Walae3777672014-07-31 09:06:17 -07002357 _movd(ExtractedElementR, T);
Jan Voung3a569182014-09-29 10:16:01 -07002358 } else { // Ty == IceType_f32
Matt Walacfe51462014-07-25 15:57:56 -07002359 // TODO(wala): _movss is only used here because _mov does not
2360 // allow a vector source and a scalar destination. _mov should be
2361 // able to be used here.
2362 // _movss is a binary instruction, so the FakeDef is needed to
2363 // keep the live range analysis consistent.
Matt Walae3777672014-07-31 09:06:17 -07002364 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2365 _movss(ExtractedElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002366 }
Matt Wala49889232014-07-18 12:45:09 -07002367 } else {
2368 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2369 // Spill the value to a stack slot and do the extraction in memory.
Matt Wala49889232014-07-18 12:45:09 -07002370 //
Matt Walae3777672014-07-31 09:06:17 -07002371 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002372 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002373 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07002374 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002375 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002376
2377 // Compute the location of the element in memory.
2378 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2379 OperandX8632Mem *Loc =
2380 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002381 _mov(ExtractedElementR, Loc);
Matt Wala49889232014-07-18 12:45:09 -07002382 }
2383
2384 if (ElementTy == IceType_i1) {
2385 // Truncate extracted integers to i1s if necessary.
2386 Variable *T = makeReg(IceType_i1);
2387 InstCast *Cast =
Matt Walae3777672014-07-31 09:06:17 -07002388 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002389 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002390 ExtractedElementR = T;
Matt Wala49889232014-07-18 12:45:09 -07002391 }
2392
2393 // Copy the element to the destination.
2394 Variable *Dest = Inst->getDest();
Matt Walae3777672014-07-31 09:06:17 -07002395 _mov(Dest, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002396}
2397
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002398void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
2399 Operand *Src0 = Inst->getSrc(0);
2400 Operand *Src1 = Inst->getSrc(1);
2401 Variable *Dest = Inst->getDest();
Matt Walace0ca8f2014-07-24 12:34:20 -07002402
2403 if (isVectorType(Dest->getType())) {
2404 InstFcmp::FCond Condition = Inst->getCondition();
2405 size_t Index = static_cast<size_t>(Condition);
2406 assert(Index < TableFcmpSize);
2407
2408 if (TableFcmp[Index].SwapVectorOperands) {
2409 Operand *T = Src0;
2410 Src0 = Src1;
2411 Src1 = T;
2412 }
2413
Jim Stichnothae953202014-12-20 06:17:49 -08002414 Variable *T = nullptr;
Matt Walace0ca8f2014-07-24 12:34:20 -07002415
Matt Walae3777672014-07-31 09:06:17 -07002416 if (Condition == InstFcmp::True) {
2417 // makeVectorOfOnes() requires an integer vector type.
Matt Walace0ca8f2014-07-24 12:34:20 -07002418 T = makeVectorOfMinusOnes(IceType_v4i32);
Matt Walae3777672014-07-31 09:06:17 -07002419 } else if (Condition == InstFcmp::False) {
2420 T = makeVectorOfZeros(Dest->getType());
2421 } else {
2422 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2423 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2424
Matt Walae3777672014-07-31 09:06:17 -07002425 switch (Condition) {
2426 default: {
Jan Voungbd385e42014-09-18 18:18:10 -07002427 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;
2428 assert(Predicate != CondX86::Cmpps_Invalid);
Matt Walae3777672014-07-31 09:06:17 -07002429 T = makeReg(Src0RM->getType());
2430 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002431 _cmpps(T, Src1RM, Predicate);
Matt Walae3777672014-07-31 09:06:17 -07002432 } break;
2433 case InstFcmp::One: {
2434 // Check both unequal and ordered.
2435 T = makeReg(Src0RM->getType());
2436 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002437 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002438 _cmpps(T, Src1RM, CondX86::Cmpps_neq);
Matt Walae3777672014-07-31 09:06:17 -07002439 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002440 _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
Matt Walae3777672014-07-31 09:06:17 -07002441 _pand(T, T2);
2442 } break;
2443 case InstFcmp::Ueq: {
2444 // Check both equal or unordered.
2445 T = makeReg(Src0RM->getType());
2446 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002447 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002448 _cmpps(T, Src1RM, CondX86::Cmpps_eq);
Matt Walae3777672014-07-31 09:06:17 -07002449 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002450 _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
Matt Walae3777672014-07-31 09:06:17 -07002451 _por(T, T2);
2452 } break;
2453 }
Matt Walae3777672014-07-31 09:06:17 -07002454 }
Matt Walace0ca8f2014-07-24 12:34:20 -07002455
2456 _movp(Dest, T);
2457 eliminateNextVectorSextInstruction(Dest);
2458 return;
2459 }
2460
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002461 // Lowering a = fcmp cond, b, c
2462 // ucomiss b, c /* only if C1 != Br_None */
2463 // /* but swap b,c order if SwapOperands==true */
2464 // mov a, <default>
2465 // j<C1> label /* only if C1 != Br_None */
2466 // j<C2> label /* only if C2 != Br_None */
2467 // FakeUse(a) /* only if C1 != Br_None */
2468 // mov a, !<default> /* only if C1 != Br_None */
2469 // label: /* only if C1 != Br_None */
2470 InstFcmp::FCond Condition = Inst->getCondition();
2471 size_t Index = static_cast<size_t>(Condition);
2472 assert(Index < TableFcmpSize);
Matt Walace0ca8f2014-07-24 12:34:20 -07002473 if (TableFcmp[Index].SwapScalarOperands) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002474 Operand *Tmp = Src0;
2475 Src0 = Src1;
2476 Src1 = Tmp;
2477 }
Jan Voungbd385e42014-09-18 18:18:10 -07002478 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
2479 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002480 if (HasC1) {
2481 Src0 = legalize(Src0);
2482 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnothae953202014-12-20 06:17:49 -08002483 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002484 _mov(T, Src0);
2485 _ucomiss(T, Src1RM);
2486 }
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002487 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002488 _mov(Dest, Default);
2489 if (HasC1) {
2490 InstX8632Label *Label = InstX8632Label::create(Func, this);
2491 _br(TableFcmp[Index].C1, Label);
2492 if (HasC2) {
2493 _br(TableFcmp[Index].C2, Label);
2494 }
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002495 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);
Jim Stichnoth47752552014-10-13 17:15:08 -07002496 _mov_nonkillable(Dest, NonDefault);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002497 Context.insert(Label);
2498 }
2499}
2500
2501void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2502 Operand *Src0 = legalize(Inst->getSrc(0));
2503 Operand *Src1 = legalize(Inst->getSrc(1));
2504 Variable *Dest = Inst->getDest();
2505
Matt Wala9a0168a2014-07-23 14:56:10 -07002506 if (isVectorType(Dest->getType())) {
2507 Type Ty = Src0->getType();
2508 // Promote i1 vectors to 128 bit integer vector types.
2509 if (typeElementType(Ty) == IceType_i1) {
2510 Type NewTy = IceType_NUM;
2511 switch (Ty) {
2512 default:
2513 llvm_unreachable("unexpected type");
2514 break;
2515 case IceType_v4i1:
2516 NewTy = IceType_v4i32;
2517 break;
2518 case IceType_v8i1:
2519 NewTy = IceType_v8i16;
2520 break;
2521 case IceType_v16i1:
2522 NewTy = IceType_v16i8;
2523 break;
2524 }
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002525 Variable *NewSrc0 = Func->makeVariable(NewTy);
2526 Variable *NewSrc1 = Func->makeVariable(NewTy);
Matt Wala9a0168a2014-07-23 14:56:10 -07002527 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2528 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2529 Src0 = NewSrc0;
2530 Src1 = NewSrc1;
2531 Ty = NewTy;
2532 }
2533
2534 InstIcmp::ICond Condition = Inst->getCondition();
2535
Matt Walae3777672014-07-31 09:06:17 -07002536 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2537 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2538
Matt Wala9a0168a2014-07-23 14:56:10 -07002539 // SSE2 only has signed comparison operations. Transform unsigned
2540 // inputs in a manner that allows for the use of signed comparison
2541 // operations by flipping the high order bits.
2542 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2543 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2544 Variable *T0 = makeReg(Ty);
2545 Variable *T1 = makeReg(Ty);
2546 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002547 _movp(T0, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002548 _pxor(T0, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002549 _movp(T1, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002550 _pxor(T1, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002551 Src0RM = T0;
2552 Src1RM = T1;
Matt Wala9a0168a2014-07-23 14:56:10 -07002553 }
2554
Matt Wala9a0168a2014-07-23 14:56:10 -07002555 Variable *T = makeReg(Ty);
2556 switch (Condition) {
2557 default:
2558 llvm_unreachable("unexpected condition");
2559 break;
2560 case InstIcmp::Eq: {
Matt Walae3777672014-07-31 09:06:17 -07002561 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002562 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002563 } break;
2564 case InstIcmp::Ne: {
Matt Walae3777672014-07-31 09:06:17 -07002565 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002566 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002567 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2568 _pxor(T, MinusOne);
2569 } break;
2570 case InstIcmp::Ugt:
2571 case InstIcmp::Sgt: {
Matt Walae3777672014-07-31 09:06:17 -07002572 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002573 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002574 } break;
2575 case InstIcmp::Uge:
2576 case InstIcmp::Sge: {
Matt Walae3777672014-07-31 09:06:17 -07002577 // !(Src1RM > Src0RM)
2578 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002579 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002580 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2581 _pxor(T, MinusOne);
2582 } break;
2583 case InstIcmp::Ult:
2584 case InstIcmp::Slt: {
Matt Walae3777672014-07-31 09:06:17 -07002585 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002586 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002587 } break;
2588 case InstIcmp::Ule:
2589 case InstIcmp::Sle: {
Matt Walae3777672014-07-31 09:06:17 -07002590 // !(Src0RM > Src1RM)
2591 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002592 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002593 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2594 _pxor(T, MinusOne);
2595 } break;
2596 }
Matt Wala9a0168a2014-07-23 14:56:10 -07002597
2598 _movp(Dest, T);
Matt Walace0ca8f2014-07-24 12:34:20 -07002599 eliminateNextVectorSextInstruction(Dest);
Matt Wala9a0168a2014-07-23 14:56:10 -07002600 return;
2601 }
2602
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002603 // If Src1 is an immediate, or known to be a physical register, we can
2604 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2605 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2606 // the physical register, but unfortunately we have to commit to one or
2607 // the other before register allocation.)
2608 bool IsSrc1ImmOrReg = false;
2609 if (llvm::isa<Constant>(Src1)) {
2610 IsSrc1ImmOrReg = true;
2611 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
2612 if (Var->hasReg())
2613 IsSrc1ImmOrReg = true;
2614 }
2615
2616 // Try to fuse a compare immediately followed by a conditional branch. This
2617 // is possible when the compare dest and the branch source operands are the
2618 // same, and are their only uses. TODO: implement this optimization for i64.
2619 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
2620 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
2621 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07002622 NextBr->setDeleted();
Jim Stichnothad403532014-09-25 12:44:17 -07002623 Operand *Src0RM =
2624 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002625 _cmp(Src0RM, Src1);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002626 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2627 NextBr->getTargetFalse());
2628 // Skip over the following branch instruction.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07002629 Context.advanceNext();
2630 return;
2631 }
2632 }
2633
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002634 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
Matt Wala43ff7eb2014-06-18 10:30:07 -07002635 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002636 Constant *One = Ctx->getConstantInt32(1);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002637 if (Src0->getType() == IceType_i64) {
2638 InstIcmp::ICond Condition = Inst->getCondition();
2639 size_t Index = static_cast<size_t>(Condition);
2640 assert(Index < TableIcmp64Size);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002641 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2642 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002643 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2644 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2645 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2646 InstX8632Label *Label = InstX8632Label::create(Func, this);
2647 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002648 _cmp(Src0LoRM, Src1LoRI);
Jan Voungbd385e42014-09-18 18:18:10 -07002649 _br(CondX86::Br_ne, Label);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002650 _cmp(Src0HiRM, Src1HiRI);
Jan Voungbd385e42014-09-18 18:18:10 -07002651 _br(CondX86::Br_ne, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07002652 _mov_nonkillable(Dest, (Condition == InstIcmp::Eq ? One : Zero));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002653 Context.insert(Label);
2654 } else {
2655 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2656 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2657 _mov(Dest, One);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002658 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002659 _br(TableIcmp64[Index].C1, LabelTrue);
2660 _br(TableIcmp64[Index].C2, LabelFalse);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002661 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002662 _br(TableIcmp64[Index].C3, LabelTrue);
2663 Context.insert(LabelFalse);
Jim Stichnoth47752552014-10-13 17:15:08 -07002664 _mov_nonkillable(Dest, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002665 Context.insert(LabelTrue);
2666 }
2667 return;
2668 }
2669
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002670 // cmp b, c
Jim Stichnothad403532014-09-25 12:44:17 -07002671 Operand *Src0RM =
2672 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002673 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002674 _cmp(Src0RM, Src1);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002675 _mov(Dest, One);
2676 _br(getIcmp32Mapping(Inst->getCondition()), Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07002677 _mov_nonkillable(Dest, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002678 Context.insert(Label);
2679}
2680
Matt Wala49889232014-07-18 12:45:09 -07002681void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002682 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2683 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
Jan Voungbc004632014-09-16 15:09:10 -07002684 ConstantInteger32 *ElementIndex =
2685 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
Matt Wala49889232014-07-18 12:45:09 -07002686 // Only constant indices are allowed in PNaCl IR.
2687 assert(ElementIndex);
2688 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002689 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
Matt Wala49889232014-07-18 12:45:09 -07002690
Matt Walae3777672014-07-31 09:06:17 -07002691 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002692 Type ElementTy = typeElementType(Ty);
2693 Type InVectorElementTy = getInVectorElementType(Ty);
2694
2695 if (ElementTy == IceType_i1) {
2696 // Expand the element to the appropriate size for it to be inserted
2697 // in the vector.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002698 Variable *Expanded = Func->makeVariable(InVectorElementTy);
Matt Walae3777672014-07-31 09:06:17 -07002699 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2700 ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002701 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002702 ElementToInsertNotLegalized = Expanded;
Matt Wala49889232014-07-18 12:45:09 -07002703 }
2704
Matt Wala0a450512014-07-30 12:44:39 -07002705 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2706 // Use insertps, pinsrb, pinsrw, or pinsrd.
Matt Walae3777672014-07-31 09:06:17 -07002707 Operand *ElementRM =
2708 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2709 Operand *SourceVectRM =
2710 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07002711 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002712 _movp(T, SourceVectRM);
Matt Wala0a450512014-07-30 12:44:39 -07002713 if (Ty == IceType_v4f32)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002714 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
Matt Wala0a450512014-07-30 12:44:39 -07002715 else
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002716 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
Matt Wala0a450512014-07-30 12:44:39 -07002717 _movp(Inst->getDest(), T);
2718 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2719 // Use shufps or movss.
Jim Stichnothae953202014-12-20 06:17:49 -08002720 Variable *ElementR = nullptr;
Matt Walae3777672014-07-31 09:06:17 -07002721 Operand *SourceVectRM =
2722 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2723
Matt Wala49889232014-07-18 12:45:09 -07002724 if (InVectorElementTy == IceType_f32) {
Matt Walae3777672014-07-31 09:06:17 -07002725 // ElementR will be in an XMM register since it is floating point.
2726 ElementR = legalizeToVar(ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002727 } else {
2728 // Copy an integer to an XMM register.
Matt Walae3777672014-07-31 09:06:17 -07002729 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2730 ElementR = makeReg(Ty);
2731 _movd(ElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002732 }
2733
Matt Walacfe51462014-07-25 15:57:56 -07002734 if (Index == 0) {
2735 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002736 _movp(T, SourceVectRM);
2737 _movss(T, ElementR);
Matt Walacfe51462014-07-25 15:57:56 -07002738 _movp(Inst->getDest(), T);
2739 return;
2740 }
2741
Matt Wala49889232014-07-18 12:45:09 -07002742 // shufps treats the source and desination operands as vectors of
2743 // four doublewords. The destination's two high doublewords are
2744 // selected from the source operand and the two low doublewords are
2745 // selected from the (original value of) the destination operand.
2746 // An insertelement operation can be effected with a sequence of two
2747 // shufps operations with appropriate masks. In all cases below,
2748 // Element[0] is being inserted into SourceVectOperand. Indices are
2749 // ordered from left to right.
2750 //
Matt Walae3777672014-07-31 09:06:17 -07002751 // insertelement into index 1 (result is stored in ElementR):
2752 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
2753 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
Matt Wala49889232014-07-18 12:45:09 -07002754 //
2755 // insertelement into index 2 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002756 // T := SourceVectRM
2757 // ElementR := ElementR[0, 0] T[0, 3]
2758 // T := T[0, 1] ElementR[0, 3]
Matt Wala49889232014-07-18 12:45:09 -07002759 //
2760 // insertelement into index 3 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002761 // T := SourceVectRM
2762 // ElementR := ElementR[0, 0] T[0, 2]
2763 // T := T[0, 1] ElementR[3, 0]
Jim Stichnothdd842db2015-01-27 12:53:53 -08002764 const unsigned char Mask1[3] = {0, 192, 128};
2765 const unsigned char Mask2[3] = {227, 196, 52};
Matt Wala49889232014-07-18 12:45:09 -07002766
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002767 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
2768 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
Matt Wala49889232014-07-18 12:45:09 -07002769
Matt Walacfe51462014-07-25 15:57:56 -07002770 if (Index == 1) {
Matt Walae3777672014-07-31 09:06:17 -07002771 _shufps(ElementR, SourceVectRM, Mask1Constant);
2772 _shufps(ElementR, SourceVectRM, Mask2Constant);
2773 _movp(Inst->getDest(), ElementR);
Matt Wala49889232014-07-18 12:45:09 -07002774 } else {
2775 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002776 _movp(T, SourceVectRM);
2777 _shufps(ElementR, T, Mask1Constant);
2778 _shufps(T, ElementR, Mask2Constant);
Matt Wala49889232014-07-18 12:45:09 -07002779 _movp(Inst->getDest(), T);
2780 }
Matt Wala49889232014-07-18 12:45:09 -07002781 } else {
2782 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2783 // Spill the value to a stack slot and perform the insertion in
2784 // memory.
Matt Wala49889232014-07-18 12:45:09 -07002785 //
Matt Walae3777672014-07-31 09:06:17 -07002786 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002787 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002788 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07002789 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002790 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002791
2792 // Compute the location of the position to insert in memory.
2793 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2794 OperandX8632Mem *Loc =
2795 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002796 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
Matt Wala49889232014-07-18 12:45:09 -07002797
2798 Variable *T = makeReg(Ty);
2799 _movp(T, Slot);
2800 _movp(Inst->getDest(), T);
2801 }
2802}
2803
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002804void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2805 switch (Instr->getIntrinsicInfo().ID) {
Jan Vounga3a01a22014-07-14 10:32:41 -07002806 case Intrinsics::AtomicCmpxchg: {
Jan Voung5cd240d2014-06-25 10:36:46 -07002807 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothdd842db2015-01-27 12:53:53 -08002808 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002809 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2810 return;
2811 }
2812 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothdd842db2015-01-27 12:53:53 -08002813 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002814 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2815 return;
2816 }
Jan Vounga3a01a22014-07-14 10:32:41 -07002817 Variable *DestPrev = Instr->getDest();
2818 Operand *PtrToMem = Instr->getArg(0);
2819 Operand *Expected = Instr->getArg(1);
2820 Operand *Desired = Instr->getArg(2);
Jan Voungc820ddf2014-07-29 14:38:51 -07002821 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2822 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002823 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
Jan Voung5cd240d2014-06-25 10:36:46 -07002824 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07002825 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002826 case Intrinsics::AtomicFence:
Jan Voung5cd240d2014-06-25 10:36:46 -07002827 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothdd842db2015-01-27 12:53:53 -08002828 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002829 Func->setError("Unexpected memory ordering for AtomicFence");
2830 return;
2831 }
2832 _mfence();
2833 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002834 case Intrinsics::AtomicFenceAll:
Jan Voung5cd240d2014-06-25 10:36:46 -07002835 // NOTE: FenceAll should prevent and load/store from being moved
2836 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2837 // instruction is currently marked coarsely as "HasSideEffects".
2838 _mfence();
2839 return;
2840 case Intrinsics::AtomicIsLockFree: {
2841 // X86 is always lock free for 8/16/32/64 bit accesses.
2842 // TODO(jvoung): Since the result is constant when given a constant
2843 // byte size, this opens up DCE opportunities.
2844 Operand *ByteSize = Instr->getArg(0);
2845 Variable *Dest = Instr->getDest();
Jan Voungbc004632014-09-16 15:09:10 -07002846 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002847 Constant *Result;
2848 switch (CI->getValue()) {
2849 default:
2850 // Some x86-64 processors support the cmpxchg16b intruction, which
2851 // can make 16-byte operations lock free (when used with the LOCK
2852 // prefix). However, that's not supported in 32-bit mode, so just
2853 // return 0 even for large sizes.
2854 Result = Ctx->getConstantZero(IceType_i32);
2855 break;
2856 case 1:
2857 case 2:
2858 case 4:
2859 case 8:
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002860 Result = Ctx->getConstantInt32(1);
Jan Voung5cd240d2014-06-25 10:36:46 -07002861 break;
2862 }
2863 _mov(Dest, Result);
2864 return;
2865 }
2866 // The PNaCl ABI requires the byte size to be a compile-time constant.
2867 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2868 return;
2869 }
2870 case Intrinsics::AtomicLoad: {
2871 // We require the memory address to be naturally aligned.
2872 // Given that is the case, then normal loads are atomic.
2873 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothdd842db2015-01-27 12:53:53 -08002874 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002875 Func->setError("Unexpected memory ordering for AtomicLoad");
2876 return;
2877 }
2878 Variable *Dest = Instr->getDest();
2879 if (Dest->getType() == IceType_i64) {
2880 // Follow what GCC does and use a movq instead of what lowerLoad()
2881 // normally does (split the load into two).
2882 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2883 // can't happen anyway, since this is x86-32 and integer arithmetic only
2884 // happens on 32-bit quantities.
2885 Variable *T = makeReg(IceType_f64);
2886 OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64);
2887 _movq(T, Addr);
2888 // Then cast the bits back out of the XMM register to the i64 Dest.
2889 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2890 lowerCast(Cast);
Jan Vounge6e497d2014-07-30 10:06:03 -07002891 // Make sure that the atomic load isn't elided when unused.
Jan Voung5cd240d2014-06-25 10:36:46 -07002892 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2893 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2894 return;
2895 }
2896 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2897 lowerLoad(Load);
Jan Vounge6e497d2014-07-30 10:06:03 -07002898 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2899 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2900 // insert the FakeUse on the last-inserted instruction's dest.
Jim Stichnoth6e992142014-07-30 14:45:20 -07002901 Context.insert(
2902 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
Jan Voung5cd240d2014-06-25 10:36:46 -07002903 return;
2904 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002905 case Intrinsics::AtomicRMW:
Jan Voung5cd240d2014-06-25 10:36:46 -07002906 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothdd842db2015-01-27 12:53:53 -08002907 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002908 Func->setError("Unexpected memory ordering for AtomicRMW");
2909 return;
2910 }
2911 lowerAtomicRMW(Instr->getDest(),
Jan Voungbc004632014-09-16 15:09:10 -07002912 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
Jim Stichnothdd842db2015-01-27 12:53:53 -08002913 Instr->getArg(0))->getValue()),
Jan Voung5cd240d2014-06-25 10:36:46 -07002914 Instr->getArg(1), Instr->getArg(2));
2915 return;
2916 case Intrinsics::AtomicStore: {
2917 if (!Intrinsics::VerifyMemoryOrder(
Jim Stichnothdd842db2015-01-27 12:53:53 -08002918 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) {
Jan Voung5cd240d2014-06-25 10:36:46 -07002919 Func->setError("Unexpected memory ordering for AtomicStore");
2920 return;
2921 }
2922 // We require the memory address to be naturally aligned.
2923 // Given that is the case, then normal stores are atomic.
2924 // Add a fence after the store to make it visible.
2925 Operand *Value = Instr->getArg(0);
2926 Operand *Ptr = Instr->getArg(1);
2927 if (Value->getType() == IceType_i64) {
2928 // Use a movq instead of what lowerStore() normally does
2929 // (split the store into two), following what GCC does.
2930 // Cast the bits from int -> to an xmm register first.
2931 Variable *T = makeReg(IceType_f64);
2932 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2933 lowerCast(Cast);
2934 // Then store XMM w/ a movq.
2935 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64);
2936 _storeq(T, Addr);
2937 _mfence();
2938 return;
2939 }
2940 InstStore *Store = InstStore::create(Func, Value, Ptr);
2941 lowerStore(Store);
2942 _mfence();
2943 return;
2944 }
Jan Voung7fa813b2014-07-18 13:01:08 -07002945 case Intrinsics::Bswap: {
2946 Variable *Dest = Instr->getDest();
2947 Operand *Val = Instr->getArg(0);
2948 // In 32-bit mode, bswap only works on 32-bit arguments, and the
2949 // argument must be a register. Use rotate left for 16-bit bswap.
2950 if (Val->getType() == IceType_i64) {
2951 Variable *T_Lo = legalizeToVar(loOperand(Val));
2952 Variable *T_Hi = legalizeToVar(hiOperand(Val));
2953 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2954 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2955 _bswap(T_Lo);
2956 _bswap(T_Hi);
2957 _mov(DestLo, T_Hi);
2958 _mov(DestHi, T_Lo);
2959 } else if (Val->getType() == IceType_i32) {
2960 Variable *T = legalizeToVar(Val);
2961 _bswap(T);
2962 _mov(Dest, T);
2963 } else {
2964 assert(Val->getType() == IceType_i16);
2965 Val = legalize(Val);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002966 Constant *Eight = Ctx->getConstantInt16(8);
Jim Stichnothae953202014-12-20 06:17:49 -08002967 Variable *T = nullptr;
Jan Voung7fa813b2014-07-18 13:01:08 -07002968 _mov(T, Val);
2969 _rol(T, Eight);
2970 _mov(Dest, T);
2971 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07002972 return;
Jan Voung7fa813b2014-07-18 13:01:08 -07002973 }
Jan Vounge4da26f2014-07-15 17:52:39 -07002974 case Intrinsics::Ctpop: {
2975 Variable *Dest = Instr->getDest();
2976 Operand *Val = Instr->getArg(0);
Jan Voung3a569182014-09-29 10:16:01 -07002977 InstCall *Call =
2978 makeHelperCall(isInt32Asserting32Or64(Val->getType()) ? "__popcountsi2"
2979 : "__popcountdi2",
2980 Dest, 1);
Jan Vounge4da26f2014-07-15 17:52:39 -07002981 Call->addArg(Val);
2982 lowerCall(Call);
2983 // The popcount helpers always return 32-bit values, while the intrinsic's
2984 // signature matches the native POPCNT instruction and fills a 64-bit reg
2985 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
2986 // the user doesn't do that in the IR. If the user does that in the IR,
2987 // then this zero'ing instruction is dead and gets optimized out.
2988 if (Val->getType() == IceType_i64) {
2989 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2990 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2991 _mov(DestHi, Zero);
2992 }
2993 return;
2994 }
2995 case Intrinsics::Ctlz: {
2996 // The "is zero undef" parameter is ignored and we always return
2997 // a well-defined value.
2998 Operand *Val = legalize(Instr->getArg(0));
2999 Operand *FirstVal;
Jim Stichnothae953202014-12-20 06:17:49 -08003000 Operand *SecondVal = nullptr;
Jan Vounge4da26f2014-07-15 17:52:39 -07003001 if (Val->getType() == IceType_i64) {
3002 FirstVal = loOperand(Val);
3003 SecondVal = hiOperand(Val);
3004 } else {
3005 FirstVal = Val;
3006 }
3007 const bool IsCttz = false;
3008 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3009 SecondVal);
3010 return;
3011 }
3012 case Intrinsics::Cttz: {
3013 // The "is zero undef" parameter is ignored and we always return
3014 // a well-defined value.
3015 Operand *Val = legalize(Instr->getArg(0));
3016 Operand *FirstVal;
Jim Stichnothae953202014-12-20 06:17:49 -08003017 Operand *SecondVal = nullptr;
Jan Vounge4da26f2014-07-15 17:52:39 -07003018 if (Val->getType() == IceType_i64) {
3019 FirstVal = hiOperand(Val);
3020 SecondVal = loOperand(Val);
3021 } else {
3022 FirstVal = Val;
3023 }
3024 const bool IsCttz = true;
3025 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3026 SecondVal);
3027 return;
3028 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003029 case Intrinsics::Longjmp: {
Jim Stichnothae953202014-12-20 06:17:49 -08003030 InstCall *Call = makeHelperCall("longjmp", nullptr, 2);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003031 Call->addArg(Instr->getArg(0));
3032 Call->addArg(Instr->getArg(1));
3033 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003034 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003035 }
3036 case Intrinsics::Memcpy: {
3037 // In the future, we could potentially emit an inline memcpy/memset, etc.
3038 // for intrinsic calls w/ a known length.
Jim Stichnothae953202014-12-20 06:17:49 -08003039 InstCall *Call = makeHelperCall("memcpy", nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003040 Call->addArg(Instr->getArg(0));
3041 Call->addArg(Instr->getArg(1));
3042 Call->addArg(Instr->getArg(2));
3043 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003044 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003045 }
3046 case Intrinsics::Memmove: {
Jim Stichnothae953202014-12-20 06:17:49 -08003047 InstCall *Call = makeHelperCall("memmove", nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003048 Call->addArg(Instr->getArg(0));
3049 Call->addArg(Instr->getArg(1));
3050 Call->addArg(Instr->getArg(2));
3051 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003052 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003053 }
3054 case Intrinsics::Memset: {
3055 // The value operand needs to be extended to a stack slot size
Matt Wala105b7042014-08-11 19:56:19 -07003056 // because the PNaCl ABI requires arguments to be at least 32 bits
3057 // wide.
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003058 Operand *ValOp = Instr->getArg(1);
3059 assert(ValOp->getType() == IceType_i8);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003060 Variable *ValExt = Func->makeVariable(stackSlotType());
Jan Voung957c50d2014-07-21 14:05:29 -07003061 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jim Stichnothae953202014-12-20 06:17:49 -08003062 InstCall *Call = makeHelperCall("memset", nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003063 Call->addArg(Instr->getArg(0));
3064 Call->addArg(ValExt);
3065 Call->addArg(Instr->getArg(2));
3066 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003067 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003068 }
3069 case Intrinsics::NaClReadTP: {
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003070 if (Ctx->getFlags().UseSandboxing) {
3071 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnothae953202014-12-20 06:17:49 -08003072 Operand *Src =
3073 OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0,
3074 OperandX8632Mem::SegReg_GS);
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003075 Variable *Dest = Instr->getDest();
Jim Stichnothae953202014-12-20 06:17:49 -08003076 Variable *T = nullptr;
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003077 _mov(T, Src);
3078 _mov(Dest, T);
3079 } else {
3080 InstCall *Call = makeHelperCall("__nacl_read_tp", Instr->getDest(), 0);
3081 lowerCall(Call);
3082 }
Jan Voung5cd240d2014-06-25 10:36:46 -07003083 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003084 }
3085 case Intrinsics::Setjmp: {
3086 InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1);
3087 Call->addArg(Instr->getArg(0));
3088 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003089 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003090 }
Jan Voungf37fbbe2014-07-09 16:13:13 -07003091 case Intrinsics::Sqrt: {
3092 Operand *Src = legalize(Instr->getArg(0));
3093 Variable *Dest = Instr->getDest();
3094 Variable *T = makeReg(Dest->getType());
3095 _sqrtss(T, Src);
3096 _mov(Dest, T);
3097 return;
3098 }
Jan Voung7b34b592014-07-18 13:12:58 -07003099 case Intrinsics::Stacksave: {
Jan Voungbd385e42014-09-18 18:18:10 -07003100 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung7b34b592014-07-18 13:12:58 -07003101 Variable *Dest = Instr->getDest();
3102 _mov(Dest, esp);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003103 return;
Jan Voung7b34b592014-07-18 13:12:58 -07003104 }
3105 case Intrinsics::Stackrestore: {
Jan Voungbd385e42014-09-18 18:18:10 -07003106 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth47752552014-10-13 17:15:08 -07003107 _mov_nonkillable(esp, Instr->getArg(0));
Jan Voung7b34b592014-07-18 13:12:58 -07003108 return;
3109 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003110 case Intrinsics::Trap:
3111 _ud2();
Jan Voung5cd240d2014-06-25 10:36:46 -07003112 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003113 case Intrinsics::UnknownIntrinsic:
3114 Func->setError("Should not be lowering UnknownIntrinsic");
3115 return;
3116 }
3117 return;
3118}
3119
Jan Vounga3a01a22014-07-14 10:32:41 -07003120void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
3121 Operand *Expected, Operand *Desired) {
3122 if (Expected->getType() == IceType_i64) {
3123 // Reserve the pre-colored registers first, before adding any more
3124 // infinite-weight variables from FormMemoryOperand's legalization.
Jan Voungbd385e42014-09-18 18:18:10 -07003125 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3126 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3127 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3128 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003129 _mov(T_eax, loOperand(Expected));
3130 _mov(T_edx, hiOperand(Expected));
3131 _mov(T_ebx, loOperand(Desired));
3132 _mov(T_ecx, hiOperand(Desired));
3133 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3134 const bool Locked = true;
3135 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3136 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3137 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3138 _mov(DestLo, T_eax);
3139 _mov(DestHi, T_edx);
3140 return;
3141 }
Jan Voungbd385e42014-09-18 18:18:10 -07003142 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003143 _mov(T_eax, Expected);
3144 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3145 Variable *DesiredReg = legalizeToVar(Desired);
3146 const bool Locked = true;
3147 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3148 _mov(DestPrev, T_eax);
3149}
3150
Jan Voungc820ddf2014-07-29 14:38:51 -07003151bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
3152 Operand *Expected,
3153 Operand *Desired) {
3154 if (Ctx->getOptLevel() == Opt_m1)
3155 return false;
3156 // Peek ahead a few instructions and see how Dest is used.
3157 // It's very common to have:
3158 //
3159 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3160 // [%y_phi = ...] // list of phi stores
3161 // %p = icmp eq i32 %x, %expected
3162 // br i1 %p, label %l1, label %l2
3163 //
3164 // which we can optimize into:
3165 //
3166 // %x = <cmpxchg code>
3167 // [%y_phi = ...] // list of phi stores
3168 // br eq, %l1, %l2
3169 InstList::iterator I = Context.getCur();
3170 // I is currently the InstIntrinsicCall. Peek past that.
3171 // This assumes that the atomic cmpxchg has not been lowered yet,
3172 // so that the instructions seen in the scan from "Cur" is simple.
3173 assert(llvm::isa<InstIntrinsicCall>(*I));
3174 Inst *NextInst = Context.getNextInst(I);
3175 if (!NextInst)
3176 return false;
3177 // There might be phi assignments right before the compare+branch, since this
3178 // could be a backward branch for a loop. This placement of assignments is
3179 // determined by placePhiStores().
3180 std::vector<InstAssign *> PhiAssigns;
3181 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3182 if (PhiAssign->getDest() == Dest)
3183 return false;
3184 PhiAssigns.push_back(PhiAssign);
3185 NextInst = Context.getNextInst(I);
3186 if (!NextInst)
3187 return false;
3188 }
3189 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3190 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3191 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3192 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3193 return false;
3194 }
3195 NextInst = Context.getNextInst(I);
3196 if (!NextInst)
3197 return false;
3198 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3199 if (!NextBr->isUnconditional() &&
3200 NextCmp->getDest() == NextBr->getCondition() &&
3201 NextBr->isLastUse(NextCmp->getDest())) {
3202 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3203 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3204 // Lower the phi assignments now, before the branch (same placement
3205 // as before).
3206 InstAssign *PhiAssign = PhiAssigns[i];
Jan Voungc820ddf2014-07-29 14:38:51 -07003207 PhiAssign->setDeleted();
Jim Stichnoth89d79562014-08-27 13:50:03 -07003208 lowerAssign(PhiAssign);
Jan Voungc820ddf2014-07-29 14:38:51 -07003209 Context.advanceNext();
3210 }
Jan Voungbd385e42014-09-18 18:18:10 -07003211 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
Jan Voungc820ddf2014-07-29 14:38:51 -07003212 // Skip over the old compare and branch, by deleting them.
3213 NextCmp->setDeleted();
3214 NextBr->setDeleted();
3215 Context.advanceNext();
3216 Context.advanceNext();
3217 return true;
3218 }
3219 }
3220 }
3221 return false;
3222}
3223
Jan Voung5cd240d2014-06-25 10:36:46 -07003224void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3225 Operand *Ptr, Operand *Val) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003226 bool NeedsCmpxchg = false;
Jim Stichnothae953202014-12-20 06:17:49 -08003227 LowerBinOp Op_Lo = nullptr;
3228 LowerBinOp Op_Hi = nullptr;
Jan Voung5cd240d2014-06-25 10:36:46 -07003229 switch (Operation) {
3230 default:
3231 Func->setError("Unknown AtomicRMW operation");
3232 return;
3233 case Intrinsics::AtomicAdd: {
3234 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003235 // All the fall-through paths must set this to true, but use this
3236 // for asserting.
3237 NeedsCmpxchg = true;
3238 Op_Lo = &TargetX8632::_add;
3239 Op_Hi = &TargetX8632::_adc;
3240 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003241 }
3242 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3243 const bool Locked = true;
Jim Stichnothae953202014-12-20 06:17:49 -08003244 Variable *T = nullptr;
Jan Voung5cd240d2014-06-25 10:36:46 -07003245 _mov(T, Val);
3246 _xadd(Addr, T, Locked);
3247 _mov(Dest, T);
3248 return;
3249 }
3250 case Intrinsics::AtomicSub: {
3251 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003252 NeedsCmpxchg = true;
3253 Op_Lo = &TargetX8632::_sub;
3254 Op_Hi = &TargetX8632::_sbb;
3255 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003256 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003257 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
3258 const bool Locked = true;
Jim Stichnothae953202014-12-20 06:17:49 -08003259 Variable *T = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003260 _mov(T, Val);
3261 _neg(T);
3262 _xadd(Addr, T, Locked);
3263 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003264 return;
3265 }
3266 case Intrinsics::AtomicOr:
Jan Vounga3a01a22014-07-14 10:32:41 -07003267 // TODO(jvoung): If Dest is null or dead, then some of these
3268 // operations do not need an "exchange", but just a locked op.
3269 // That appears to be "worth" it for sub, or, and, and xor.
3270 // xadd is probably fine vs lock add for add, and xchg is fine
3271 // vs an atomic store.
3272 NeedsCmpxchg = true;
3273 Op_Lo = &TargetX8632::_or;
3274 Op_Hi = &TargetX8632::_or;
3275 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003276 case Intrinsics::AtomicAnd:
Jan Vounga3a01a22014-07-14 10:32:41 -07003277 NeedsCmpxchg = true;
3278 Op_Lo = &TargetX8632::_and;
3279 Op_Hi = &TargetX8632::_and;
3280 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003281 case Intrinsics::AtomicXor:
Jan Vounga3a01a22014-07-14 10:32:41 -07003282 NeedsCmpxchg = true;
3283 Op_Lo = &TargetX8632::_xor;
3284 Op_Hi = &TargetX8632::_xor;
3285 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003286 case Intrinsics::AtomicExchange:
Jan Vounga3a01a22014-07-14 10:32:41 -07003287 if (Dest->getType() == IceType_i64) {
3288 NeedsCmpxchg = true;
3289 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3290 // just need to be moved to the ecx and ebx registers.
Jim Stichnothae953202014-12-20 06:17:49 -08003291 Op_Lo = nullptr;
3292 Op_Hi = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003293 break;
3294 }
3295 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
Jim Stichnothae953202014-12-20 06:17:49 -08003296 Variable *T = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003297 _mov(T, Val);
3298 _xchg(Addr, T);
3299 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003300 return;
3301 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003302 // Otherwise, we need a cmpxchg loop.
Jim Stichnoth6e992142014-07-30 14:45:20 -07003303 (void)NeedsCmpxchg;
Jan Vounga3a01a22014-07-14 10:32:41 -07003304 assert(NeedsCmpxchg);
3305 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3306}
3307
3308void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
3309 Variable *Dest, Operand *Ptr,
3310 Operand *Val) {
3311 // Expand a more complex RMW operation as a cmpxchg loop:
3312 // For 64-bit:
3313 // mov eax, [ptr]
3314 // mov edx, [ptr + 4]
3315 // .LABEL:
3316 // mov ebx, eax
3317 // <Op_Lo> ebx, <desired_adj_lo>
3318 // mov ecx, edx
3319 // <Op_Hi> ecx, <desired_adj_hi>
3320 // lock cmpxchg8b [ptr]
3321 // jne .LABEL
3322 // mov <dest_lo>, eax
3323 // mov <dest_lo>, edx
3324 //
3325 // For 32-bit:
3326 // mov eax, [ptr]
3327 // .LABEL:
3328 // mov <reg>, eax
3329 // op <reg>, [desired_adj]
3330 // lock cmpxchg [ptr], <reg>
3331 // jne .LABEL
3332 // mov <dest>, eax
3333 //
Jim Stichnothae953202014-12-20 06:17:49 -08003334 // If Op_{Lo,Hi} are nullptr, then just copy the value.
Jan Vounga3a01a22014-07-14 10:32:41 -07003335 Val = legalize(Val);
3336 Type Ty = Val->getType();
3337 if (Ty == IceType_i64) {
Jan Voungbd385e42014-09-18 18:18:10 -07003338 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3339 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003340 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3341 _mov(T_eax, loOperand(Addr));
3342 _mov(T_edx, hiOperand(Addr));
Jan Voungbd385e42014-09-18 18:18:10 -07003343 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3344 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003345 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothae953202014-12-20 06:17:49 -08003346 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003347 if (!IsXchg8b) {
3348 Context.insert(Label);
3349 _mov(T_ebx, T_eax);
3350 (this->*Op_Lo)(T_ebx, loOperand(Val));
3351 _mov(T_ecx, T_edx);
3352 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3353 } else {
3354 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3355 // It just needs the Val loaded into ebx and ecx.
3356 // That can also be done before the loop.
3357 _mov(T_ebx, loOperand(Val));
3358 _mov(T_ecx, hiOperand(Val));
3359 Context.insert(Label);
3360 }
3361 const bool Locked = true;
3362 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003363 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003364 if (!IsXchg8b) {
3365 // If Val is a variable, model the extended live range of Val through
3366 // the end of the loop, since it will be re-used by the loop.
3367 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3368 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3369 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3370 Context.insert(InstFakeUse::create(Func, ValLo));
3371 Context.insert(InstFakeUse::create(Func, ValHi));
3372 }
3373 } else {
3374 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3375 Context.insert(InstFakeUse::create(Func, T_ebx));
3376 Context.insert(InstFakeUse::create(Func, T_ecx));
3377 }
3378 // The address base is also reused in the loop.
3379 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3380 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3381 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3382 _mov(DestLo, T_eax);
3383 _mov(DestHi, T_edx);
3384 return;
3385 }
3386 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
Jan Voungbd385e42014-09-18 18:18:10 -07003387 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003388 _mov(T_eax, Addr);
3389 InstX8632Label *Label = InstX8632Label::create(Func, this);
3390 Context.insert(Label);
3391 // We want to pick a different register for T than Eax, so don't use
Jim Stichnothae953202014-12-20 06:17:49 -08003392 // _mov(T == nullptr, T_eax).
Jan Vounga3a01a22014-07-14 10:32:41 -07003393 Variable *T = makeReg(Ty);
3394 _mov(T, T_eax);
3395 (this->*Op_Lo)(T, Val);
3396 const bool Locked = true;
3397 _cmpxchg(Addr, T_eax, T, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003398 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003399 // If Val is a variable, model the extended live range of Val through
3400 // the end of the loop, since it will be re-used by the loop.
3401 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3402 Context.insert(InstFakeUse::create(Func, ValVar));
3403 }
3404 // The address base is also reused in the loop.
3405 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3406 _mov(Dest, T_eax);
Jan Voung5cd240d2014-06-25 10:36:46 -07003407}
3408
Jan Vounge4da26f2014-07-15 17:52:39 -07003409// Lowers count {trailing, leading} zeros intrinsic.
3410//
3411// We could do constant folding here, but that should have
3412// been done by the front-end/middle-end optimizations.
3413void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3414 Operand *FirstVal, Operand *SecondVal) {
3415 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3416 // Then the instructions will handle the Val == 0 case much more simply
3417 // and won't require conversion from bit position to number of zeros.
3418 //
3419 // Otherwise:
3420 // bsr IF_NOT_ZERO, Val
3421 // mov T_DEST, 63
3422 // cmovne T_DEST, IF_NOT_ZERO
3423 // xor T_DEST, 31
3424 // mov DEST, T_DEST
3425 //
3426 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3427 // register. Also, bsf and bsr require their dest to be a register.
3428 //
3429 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3430 // E.g., for 000... 00001100, bsr will say that the most significant bit
3431 // set is at position 3, while the number of leading zeros is 28. Xor is
3432 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3433 //
3434 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3435 // are all zero, and compute the result for that case (checking the lower
3436 // 32 bits). Then actually compute the result for the upper bits and
3437 // cmov in the result from the lower computation if the earlier speculation
3438 // was correct.
3439 //
3440 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3441 // bit position conversion, and the speculation is reversed.
3442 assert(Ty == IceType_i32 || Ty == IceType_i64);
3443 Variable *T = makeReg(IceType_i32);
Matt Wala53c5e602014-07-22 16:39:38 -07003444 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
Jan Vounge4da26f2014-07-15 17:52:39 -07003445 if (Cttz) {
Matt Wala53c5e602014-07-22 16:39:38 -07003446 _bsf(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003447 } else {
Matt Wala53c5e602014-07-22 16:39:38 -07003448 _bsr(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003449 }
3450 Variable *T_Dest = makeReg(IceType_i32);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003451 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3452 Constant *ThirtyOne = Ctx->getConstantInt32(31);
Jan Vounge4da26f2014-07-15 17:52:39 -07003453 if (Cttz) {
3454 _mov(T_Dest, ThirtyTwo);
3455 } else {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003456 Constant *SixtyThree = Ctx->getConstantInt32(63);
Jan Vounge4da26f2014-07-15 17:52:39 -07003457 _mov(T_Dest, SixtyThree);
3458 }
Jan Voungbd385e42014-09-18 18:18:10 -07003459 _cmov(T_Dest, T, CondX86::Br_ne);
Jan Vounge4da26f2014-07-15 17:52:39 -07003460 if (!Cttz) {
3461 _xor(T_Dest, ThirtyOne);
3462 }
3463 if (Ty == IceType_i32) {
3464 _mov(Dest, T_Dest);
3465 return;
3466 }
3467 _add(T_Dest, ThirtyTwo);
3468 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3469 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3470 // Will be using "test" on this, so we need a registerized variable.
3471 Variable *SecondVar = legalizeToVar(SecondVal);
3472 Variable *T_Dest2 = makeReg(IceType_i32);
3473 if (Cttz) {
3474 _bsf(T_Dest2, SecondVar);
3475 } else {
3476 _bsr(T_Dest2, SecondVar);
3477 _xor(T_Dest2, ThirtyOne);
3478 }
3479 _test(SecondVar, SecondVar);
Jan Voungbd385e42014-09-18 18:18:10 -07003480 _cmov(T_Dest2, T_Dest, CondX86::Br_e);
Jan Vounge4da26f2014-07-15 17:52:39 -07003481 _mov(DestLo, T_Dest2);
3482 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3483}
3484
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003485namespace {
3486
3487bool isAdd(const Inst *Inst) {
3488 if (const InstArithmetic *Arith =
3489 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3490 return (Arith->getOp() == InstArithmetic::Add);
3491 }
3492 return false;
3493}
3494
Jim Stichnoth89d79562014-08-27 13:50:03 -07003495void dumpAddressOpt(const Cfg *Func, const Variable *Base,
3496 const Variable *Index, uint16_t Shift, int32_t Offset,
3497 const Inst *Reason) {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08003498 if (!ALLOW_DUMP)
3499 return;
Jim Stichnothfa4efea2015-01-27 05:06:03 -08003500 if (!Func->isVerbose(IceV_AddrOpt))
Jim Stichnoth89d79562014-08-27 13:50:03 -07003501 return;
Jim Stichnothe4a8f402015-01-20 12:52:51 -08003502 OstreamLocker L(Func->getContext());
Jim Stichnoth89d79562014-08-27 13:50:03 -07003503 Ostream &Str = Func->getContext()->getStrDump();
3504 Str << "Instruction: ";
3505 Reason->dumpDecorated(Func);
3506 Str << " results in Base=";
3507 if (Base)
3508 Base->dump(Func);
3509 else
3510 Str << "<null>";
3511 Str << ", Index=";
3512 if (Index)
3513 Index->dump(Func);
3514 else
3515 Str << "<null>";
3516 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
3517}
3518
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003519bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var,
3520 const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003521 // Var originates from Var=SrcVar ==>
3522 // set Var:=SrcVar
Jim Stichnothae953202014-12-20 06:17:49 -08003523 if (Var == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003524 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003525 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
3526 assert(!VMetadata->isMultiDef(Var));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003527 if (llvm::isa<InstAssign>(VarAssign)) {
3528 Operand *SrcOp = VarAssign->getSrc(0);
3529 assert(SrcOp);
3530 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003531 if (!VMetadata->isMultiDef(SrcVar) &&
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003532 // TODO: ensure SrcVar stays single-BB
3533 true) {
3534 Var = SrcVar;
3535 Reason = VarAssign;
3536 return true;
3537 }
3538 }
3539 }
3540 }
3541 return false;
3542}
3543
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003544bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable *&Base,
3545 Variable *&Index, uint16_t &Shift,
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003546 const Inst *&Reason) {
Jim Stichnothae953202014-12-20 06:17:49 -08003547 // Index==nullptr && Base is Base=Var1+Var2 ==>
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003548 // set Base=Var1, Index=Var2, Shift=0
Jim Stichnothae953202014-12-20 06:17:49 -08003549 if (Base == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003550 return false;
Jim Stichnothae953202014-12-20 06:17:49 -08003551 if (Index != nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003552 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003553 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothae953202014-12-20 06:17:49 -08003554 if (BaseInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003555 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003556 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003557 if (BaseInst->getSrcSize() < 2)
3558 return false;
3559 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003560 if (VMetadata->isMultiDef(Var1))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003561 return false;
3562 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003563 if (VMetadata->isMultiDef(Var2))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003564 return false;
3565 if (isAdd(BaseInst) &&
3566 // TODO: ensure Var1 and Var2 stay single-BB
3567 true) {
3568 Base = Var1;
3569 Index = Var2;
3570 Shift = 0; // should already have been 0
3571 Reason = BaseInst;
3572 return true;
3573 }
3574 }
3575 }
3576 return false;
3577}
3578
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003579bool matchShiftedIndex(const VariablesMetadata *VMetadata, Variable *&Index,
3580 uint16_t &Shift, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003581 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
3582 // Index=Var, Shift+=log2(Const)
Jim Stichnothae953202014-12-20 06:17:49 -08003583 if (Index == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003584 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003585 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
Jim Stichnothae953202014-12-20 06:17:49 -08003586 if (IndexInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003587 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003588 assert(!VMetadata->isMultiDef(Index));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003589 if (IndexInst->getSrcSize() < 2)
3590 return false;
3591 if (const InstArithmetic *ArithInst =
3592 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
3593 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
Jan Voungbc004632014-09-16 15:09:10 -07003594 if (ConstantInteger32 *Const =
3595 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003596 if (ArithInst->getOp() == InstArithmetic::Mul &&
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003597 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003598 uint64_t Mult = Const->getValue();
3599 uint32_t LogMult;
3600 switch (Mult) {
3601 case 1:
3602 LogMult = 0;
3603 break;
3604 case 2:
3605 LogMult = 1;
3606 break;
3607 case 4:
3608 LogMult = 2;
3609 break;
3610 case 8:
3611 LogMult = 3;
3612 break;
3613 default:
3614 return false;
3615 }
3616 if (Shift + LogMult <= 3) {
3617 Index = Var;
3618 Shift += LogMult;
3619 Reason = IndexInst;
3620 return true;
3621 }
3622 }
3623 }
3624 }
3625 }
3626 return false;
3627}
3628
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003629bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
3630 int32_t &Offset, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003631 // Base is Base=Var+Const || Base is Base=Const+Var ==>
3632 // set Base=Var, Offset+=Const
3633 // Base is Base=Var-Const ==>
3634 // set Base=Var, Offset-=Const
Jim Stichnothae953202014-12-20 06:17:49 -08003635 if (Base == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003636 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003637 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothae953202014-12-20 06:17:49 -08003638 if (BaseInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003639 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003640 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003641 if (const InstArithmetic *ArithInst =
3642 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
3643 if (ArithInst->getOp() != InstArithmetic::Add &&
3644 ArithInst->getOp() != InstArithmetic::Sub)
3645 return false;
3646 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
Jim Stichnothae953202014-12-20 06:17:49 -08003647 Variable *Var = nullptr;
3648 ConstantInteger32 *Const = nullptr;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003649 if (Variable *VariableOperand =
3650 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3651 Var = VariableOperand;
Jan Voungbc004632014-09-16 15:09:10 -07003652 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003653 } else if (IsAdd) {
Jan Voungbc004632014-09-16 15:09:10 -07003654 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003655 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
3656 }
Jim Stichnothae953202014-12-20 06:17:49 -08003657 if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003658 return false;
Jan Voungbc004632014-09-16 15:09:10 -07003659 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
Jan Voung8acded02014-09-22 18:02:25 -07003660 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
Jan Voungbc004632014-09-16 15:09:10 -07003661 return false;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003662 Base = Var;
Jan Voungbc004632014-09-16 15:09:10 -07003663 Offset += MoreOffset;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003664 Reason = BaseInst;
3665 return true;
3666 }
3667 return false;
3668}
3669
Jim Stichnoth89d79562014-08-27 13:50:03 -07003670void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
3671 Variable *&Index, uint16_t &Shift, int32_t &Offset) {
Jim Stichnoth800dab22014-09-20 12:25:02 -07003672 Func->resetCurrentNode();
Jim Stichnothfa4efea2015-01-27 05:06:03 -08003673 if (Func->isVerbose(IceV_AddrOpt)) {
Jim Stichnothe4a8f402015-01-20 12:52:51 -08003674 OstreamLocker L(Func->getContext());
Jim Stichnoth89d79562014-08-27 13:50:03 -07003675 Ostream &Str = Func->getContext()->getStrDump();
3676 Str << "\nStarting computeAddressOpt for instruction:\n ";
3677 Instr->dumpDecorated(Func);
3678 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003679 (void)Offset; // TODO: pattern-match for non-zero offsets.
Jim Stichnothae953202014-12-20 06:17:49 -08003680 if (Base == nullptr)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003681 return;
3682 // If the Base has more than one use or is live across multiple
3683 // blocks, then don't go further. Alternatively (?), never consider
3684 // a transformation that would change a variable that is currently
3685 // *not* live across basic block boundaries into one that *is*.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003686 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003687 return;
3688
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003689 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003690 bool Continue = true;
3691 while (Continue) {
Jim Stichnothae953202014-12-20 06:17:49 -08003692 const Inst *Reason = nullptr;
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003693 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
3694 matchTransitiveAssign(VMetadata, Index, Reason) ||
3695 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
3696 matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
3697 matchOffsetBase(VMetadata, Base, Offset, Reason)) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003698 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
3699 } else {
3700 Continue = false;
Matt Wala8835b892014-08-11 17:46:58 -07003701 }
3702
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003703 // Index is Index=Var<<Const && Const+Shift<=3 ==>
3704 // Index=Var, Shift+=Const
3705
3706 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
3707 // Index=Var, Shift+=log2(Const)
3708
3709 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
3710 // swap(Index,Base)
3711 // Similar for Base=Const*Var and Base=Var<<Const
3712
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003713 // Index is Index=Var+Const ==>
3714 // set Index=Var, Offset+=(Const<<Shift)
3715
3716 // Index is Index=Const+Var ==>
3717 // set Index=Var, Offset+=(Const<<Shift)
3718
3719 // Index is Index=Var-Const ==>
3720 // set Index=Var, Offset-=(Const<<Shift)
3721
3722 // TODO: consider overflow issues with respect to Offset.
3723 // TODO: handle symbolic constants.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003724 }
3725}
3726
3727} // anonymous namespace
3728
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003729void TargetX8632::lowerLoad(const InstLoad *Inst) {
3730 // A Load instruction can be treated the same as an Assign
3731 // instruction, after the source operand is transformed into an
3732 // OperandX8632Mem operand. Note that the address mode
3733 // optimization already creates an OperandX8632Mem operand, so it
3734 // doesn't need another level of transformation.
3735 Type Ty = Inst->getDest()->getType();
Jan Voung5cd240d2014-06-25 10:36:46 -07003736 Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003737
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003738 // Fuse this load with a subsequent Arithmetic instruction in the
3739 // following situations:
3740 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
3741 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
3742 //
3743 // TODO: Clean up and test thoroughly.
Jan Voung5cd240d2014-06-25 10:36:46 -07003744 // (E.g., if there is an mfence-all make sure the load ends up on the
3745 // same side of the fence).
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003746 //
3747 // TODO: Why limit to Arithmetic instructions? This could probably be
3748 // applied to most any instruction type. Look at all source operands
3749 // in the following instruction, and if there is one instance of the
3750 // load instruction's dest variable, and that instruction ends that
3751 // variable's live range, then make the substitution. Deal with
3752 // commutativity optimization in the arithmetic instruction lowering.
Jim Stichnothae953202014-12-20 06:17:49 -08003753 InstArithmetic *NewArith = nullptr;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003754 if (InstArithmetic *Arith =
3755 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
3756 Variable *DestLoad = Inst->getDest();
3757 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
3758 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
3759 if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
3760 DestLoad != Src0Arith) {
3761 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3762 Arith->getSrc(0), Src0);
3763 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
3764 Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
3765 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
3766 Arith->getSrc(1), Src0);
3767 }
3768 if (NewArith) {
3769 Arith->setDeleted();
3770 Context.advanceNext();
3771 lowerArithmetic(NewArith);
3772 return;
3773 }
3774 }
3775
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003776 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
3777 lowerAssign(Assign);
3778}
3779
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003780void TargetX8632::doAddressOptLoad() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08003781 Inst *Inst = Context.getCur();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003782 Variable *Dest = Inst->getDest();
3783 Operand *Addr = Inst->getSrc(0);
Jim Stichnothae953202014-12-20 06:17:49 -08003784 Variable *Index = nullptr;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003785 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003786 int32_t Offset = 0; // TODO: make Constant
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003787 // Vanilla ICE load instructions should not use the segment registers,
3788 // and computeAddressOpt only works at the level of Variables and Constants,
3789 // not other OperandX8632Mem, so there should be no mention of segment
3790 // registers there either.
3791 const OperandX8632Mem::SegmentRegisters SegmentReg =
3792 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003793 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jim Stichnoth89d79562014-08-27 13:50:03 -07003794 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003795 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07003796 Inst->setDeleted();
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003797 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003798 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003799 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003800 Context.insert(InstLoad::create(Func, Dest, Addr));
3801 }
3802}
3803
Matt Walac3302742014-08-15 16:21:56 -07003804void TargetX8632::randomlyInsertNop(float Probability) {
3805 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
3806 if (RNG.getTrueWithProbability(Probability)) {
Jim Stichnothe6d24782014-12-19 05:42:24 -08003807 _nop(RNG(X86_NUM_NOP_VARIANTS));
Matt Walac3302742014-08-15 16:21:56 -07003808 }
3809}
3810
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003811void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
3812 Func->setError("Phi found in regular instruction list");
3813}
3814
3815void TargetX8632::lowerRet(const InstRet *Inst) {
Jim Stichnothae953202014-12-20 06:17:49 -08003816 Variable *Reg = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003817 if (Inst->hasRetValue()) {
3818 Operand *Src0 = legalize(Inst->getRetValue());
3819 if (Src0->getType() == IceType_i64) {
Jim Stichnothad403532014-09-25 12:44:17 -07003820 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
3821 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003822 Reg = eax;
3823 Context.insert(InstFakeUse::create(Func, edx));
Jan Voung3a569182014-09-29 10:16:01 -07003824 } else if (isScalarFloatingType(Src0->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003825 _fld(Src0);
Matt Wala928f1292014-07-07 16:50:46 -07003826 } else if (isVectorType(Src0->getType())) {
Jim Stichnothad403532014-09-25 12:44:17 -07003827 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003828 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07003829 _mov(Reg, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003830 }
3831 }
3832 _ret(Reg);
3833 // Add a fake use of esp to make sure esp stays alive for the entire
3834 // function. Otherwise post-call esp adjustments get dead-code
3835 // eliminated. TODO: Are there more places where the fake use
3836 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
3837 // have a ret instruction.
Jan Voungbd385e42014-09-18 18:18:10 -07003838 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003839 Context.insert(InstFakeUse::create(Func, esp));
3840}
3841
3842void TargetX8632::lowerSelect(const InstSelect *Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003843 Variable *Dest = Inst->getDest();
3844 Operand *SrcT = Inst->getTrueOperand();
3845 Operand *SrcF = Inst->getFalseOperand();
Matt Wala9cb61e22014-07-24 09:44:42 -07003846 Operand *Condition = Inst->getCondition();
3847
3848 if (isVectorType(Dest->getType())) {
Matt Wala9cb61e22014-07-24 09:44:42 -07003849 Type SrcTy = SrcT->getType();
3850 Variable *T = makeReg(SrcTy);
Matt Walae3777672014-07-31 09:06:17 -07003851 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3852 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07003853 if (InstructionSet >= SSE4_1) {
3854 // TODO(wala): If the condition operand is a constant, use blendps
3855 // or pblendw.
3856 //
3857 // Use blendvps or pblendvb to implement select.
3858 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3859 SrcTy == IceType_v4f32) {
Matt Walae3777672014-07-31 09:06:17 -07003860 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Jan Voungbd385e42014-09-18 18:18:10 -07003861 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
Matt Walae3777672014-07-31 09:06:17 -07003862 _movp(xmm0, ConditionRM);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003863 _psll(xmm0, Ctx->getConstantInt8(31));
Matt Walae3777672014-07-31 09:06:17 -07003864 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07003865 _blendvps(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003866 _movp(Dest, T);
3867 } else {
3868 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3869 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
Jim Stichnothfac55172014-10-01 13:06:21 -07003870 : IceType_v16i8;
Jan Voungbd385e42014-09-18 18:18:10 -07003871 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003872 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
Matt Walae3777672014-07-31 09:06:17 -07003873 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07003874 _pblendvb(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07003875 _movp(Dest, T);
3876 }
3877 return;
3878 }
3879 // Lower select without SSE4.1:
3880 // a=d?b:c ==>
3881 // if elementtype(d) != i1:
3882 // d=sext(d);
3883 // a=(b&d)|(c&~d);
Matt Wala9cb61e22014-07-24 09:44:42 -07003884 Variable *T2 = makeReg(SrcTy);
3885 // Sign extend the condition operand if applicable.
3886 if (SrcTy == IceType_v4f32) {
3887 // The sext operation takes only integer arguments.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003888 Variable *T3 = Func->makeVariable(IceType_v4i32);
Matt Wala9cb61e22014-07-24 09:44:42 -07003889 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3890 _movp(T, T3);
3891 } else if (typeElementType(SrcTy) != IceType_i1) {
3892 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3893 } else {
Matt Walae3777672014-07-31 09:06:17 -07003894 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3895 _movp(T, ConditionRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07003896 }
Matt Wala9cb61e22014-07-24 09:44:42 -07003897 _movp(T2, T);
Matt Walad4799f42014-08-14 14:24:12 -07003898 _pand(T, SrcTRM);
3899 _pandn(T2, SrcFRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07003900 _por(T, T2);
3901 _movp(Dest, T);
Matt Wala9cb61e22014-07-24 09:44:42 -07003902
3903 return;
3904 }
3905
3906 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003907 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Matt Wala43ff7eb2014-06-18 10:30:07 -07003908 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003909 InstX8632Label *Label = InstX8632Label::create(Func, this);
3910
3911 if (Dest->getType() == IceType_i64) {
3912 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3913 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Jim Stichnothad403532014-09-25 12:44:17 -07003914 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
3915 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003916 _cmp(ConditionRM, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003917 _mov(DestLo, SrcLoRI);
3918 _mov(DestHi, SrcHiRI);
Jan Voungbd385e42014-09-18 18:18:10 -07003919 _br(CondX86::Br_ne, Label);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003920 Operand *SrcFLo = loOperand(SrcF);
3921 Operand *SrcFHi = hiOperand(SrcF);
Jim Stichnothad403532014-09-25 12:44:17 -07003922 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
3923 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07003924 _mov_nonkillable(DestLo, SrcLoRI);
3925 _mov_nonkillable(DestHi, SrcHiRI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003926 } else {
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003927 _cmp(ConditionRM, Zero);
Jim Stichnothad403532014-09-25 12:44:17 -07003928 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003929 _mov(Dest, SrcT);
Jan Voungbd385e42014-09-18 18:18:10 -07003930 _br(CondX86::Br_ne, Label);
Jim Stichnothad403532014-09-25 12:44:17 -07003931 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07003932 _mov_nonkillable(Dest, SrcF);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003933 }
3934
3935 Context.insert(Label);
3936}
3937
3938void TargetX8632::lowerStore(const InstStore *Inst) {
3939 Operand *Value = Inst->getData();
3940 Operand *Addr = Inst->getAddr();
Jan Voung5cd240d2014-06-25 10:36:46 -07003941 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
Matt Wala105b7042014-08-11 19:56:19 -07003942 Type Ty = NewAddr->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003943
Matt Wala105b7042014-08-11 19:56:19 -07003944 if (Ty == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003945 Value = legalize(Value);
Jim Stichnothad403532014-09-25 12:44:17 -07003946 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
3947 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003948 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
3949 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
Matt Wala105b7042014-08-11 19:56:19 -07003950 } else if (isVectorType(Ty)) {
3951 _storep(legalizeToVar(Value), NewAddr);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003952 } else {
Jim Stichnothad403532014-09-25 12:44:17 -07003953 Value = legalize(Value, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003954 _store(Value, NewAddr);
3955 }
3956}
3957
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003958void TargetX8632::doAddressOptStore() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08003959 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003960 Operand *Data = Inst->getData();
3961 Operand *Addr = Inst->getAddr();
Jim Stichnothae953202014-12-20 06:17:49 -08003962 Variable *Index = nullptr;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003963 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003964 int32_t Offset = 0; // TODO: make Constant
3965 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003966 // Vanilla ICE store instructions should not use the segment registers,
3967 // and computeAddressOpt only works at the level of Variables and Constants,
3968 // not other OperandX8632Mem, so there should be no mention of segment
3969 // registers there either.
3970 const OperandX8632Mem::SegmentRegisters SegmentReg =
3971 OperandX8632Mem::DefaultSegment;
Jim Stichnoth89d79562014-08-27 13:50:03 -07003972 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003973 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07003974 Inst->setDeleted();
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003975 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003976 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003977 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003978 Context.insert(InstStore::create(Func, Data, Addr));
3979 }
3980}
3981
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003982void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
3983 // This implements the most naive possible lowering.
3984 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
3985 Operand *Src0 = Inst->getComparison();
3986 SizeT NumCases = Inst->getNumCases();
Jim Stichnoth2daadb72014-11-03 19:57:24 -08003987 if (Src0->getType() == IceType_i64) {
3988 Src0 = legalize(Src0); // get Base/Index into physical registers
3989 Operand *Src0Lo = loOperand(Src0);
3990 Operand *Src0Hi = hiOperand(Src0);
3991 if (NumCases >= 2) {
3992 Src0Lo = legalizeToVar(Src0Lo);
3993 Src0Hi = legalizeToVar(Src0Hi);
3994 } else {
3995 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
3996 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
3997 }
3998 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003999 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4000 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004001 InstX8632Label *Label = InstX8632Label::create(Func, this);
4002 _cmp(Src0Lo, ValueLo);
4003 _br(CondX86::Br_ne, Label);
4004 _cmp(Src0Hi, ValueHi);
4005 _br(CondX86::Br_e, Inst->getLabel(I));
4006 Context.insert(Label);
4007 }
4008 _br(Inst->getLabelDefault());
4009 return;
4010 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004011 // OK, we'll be slightly less naive by forcing Src into a physical
4012 // register if there are 2 or more uses.
4013 if (NumCases >= 2)
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004014 Src0 = legalizeToVar(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004015 else
Jim Stichnothad403532014-09-25 12:44:17 -07004016 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004017 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004018 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004019 _cmp(Src0, Value);
Jan Voungbd385e42014-09-18 18:18:10 -07004020 _br(CondX86::Br_e, Inst->getLabel(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004021 }
4022
4023 _br(Inst->getLabelDefault());
4024}
4025
Matt Walaafeaee42014-08-07 13:47:30 -07004026void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4027 Variable *Dest, Operand *Src0,
4028 Operand *Src1) {
4029 assert(isVectorType(Dest->getType()));
4030 Type Ty = Dest->getType();
4031 Type ElementTy = typeElementType(Ty);
4032 SizeT NumElements = typeNumElements(Ty);
4033
4034 Operand *T = Ctx->getConstantUndef(Ty);
4035 for (SizeT I = 0; I < NumElements; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004036 Constant *Index = Ctx->getConstantInt32(I);
Matt Walaafeaee42014-08-07 13:47:30 -07004037
4038 // Extract the next two inputs.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004039 Variable *Op0 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004040 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004041 Variable *Op1 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004042 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4043
4044 // Perform the arithmetic as a scalar operation.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004045 Variable *Res = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004046 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4047
4048 // Insert the result into position.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004049 Variable *DestT = Func->makeVariable(Ty);
Matt Walaafeaee42014-08-07 13:47:30 -07004050 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4051 T = DestT;
Matt Walaafeaee42014-08-07 13:47:30 -07004052 }
4053
4054 lowerAssign(InstAssign::create(Func, Dest, T));
4055}
4056
Matt Walace0ca8f2014-07-24 12:34:20 -07004057// The following pattern occurs often in lowered C and C++ code:
4058//
4059// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4060// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4061//
4062// We can eliminate the sext operation by copying the result of pcmpeqd,
4063// pcmpgtd, or cmpps (which produce sign extended results) to the result
4064// of the sext operation.
4065void
4066TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
4067 if (InstCast *NextCast =
4068 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4069 if (NextCast->getCastKind() == InstCast::Sext &&
4070 NextCast->getSrc(0) == SignExtendedResult) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004071 NextCast->setDeleted();
Matt Walace0ca8f2014-07-24 12:34:20 -07004072 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4073 // Skip over the instruction.
Matt Walace0ca8f2014-07-24 12:34:20 -07004074 Context.advanceNext();
4075 }
4076 }
4077}
4078
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004079void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
4080 const SizeT MaxSrcs = 0;
Jim Stichnothae953202014-12-20 06:17:49 -08004081 Variable *Dest = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004082 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
4083 lowerCall(Call);
4084}
4085
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004086// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4087// preserve integrity of liveness analysis. Undef values are also
4088// turned into zeroes, since loOperand() and hiOperand() don't expect
4089// Undef input.
4090void TargetX8632::prelowerPhis() {
4091 CfgNode *Node = Context.getNode();
Jim Stichnoth29841e82014-12-23 12:26:24 -08004092 for (Inst &I : Node->getPhis()) {
4093 auto Phi = llvm::dyn_cast<InstPhi>(&I);
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004094 if (Phi->isDeleted())
4095 continue;
4096 Variable *Dest = Phi->getDest();
4097 if (Dest->getType() == IceType_i64) {
4098 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4099 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4100 InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
4101 InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);
4102 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
4103 Operand *Src = Phi->getSrc(I);
4104 CfgNode *Label = Phi->getLabel(I);
4105 if (llvm::isa<ConstantUndef>(Src))
4106 Src = Ctx->getConstantZero(Dest->getType());
4107 PhiLo->addArgument(loOperand(Src), Label);
4108 PhiHi->addArgument(hiOperand(Src), Label);
4109 }
4110 Node->getPhis().push_back(PhiLo);
4111 Node->getPhis().push_back(PhiHi);
4112 Phi->setDeleted();
4113 }
4114 }
4115}
4116
4117namespace {
4118
4119bool isMemoryOperand(const Operand *Opnd) {
4120 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
4121 return !Var->hasReg();
4122 if (llvm::isa<Constant>(Opnd))
4123 return isScalarFloatingType(Opnd->getType());
4124 return true;
4125}
4126
4127} // end of anonymous namespace
4128
4129// Lower the pre-ordered list of assignments into mov instructions.
4130// Also has to do some ad-hoc register allocation as necessary.
4131void TargetX8632::lowerPhiAssignments(CfgNode *Node,
4132 const AssignList &Assignments) {
4133 // Check that this is a properly initialized shell of a node.
4134 assert(Node->getOutEdges().size() == 1);
4135 assert(Node->getInsts().empty());
4136 assert(Node->getPhis().empty());
Jim Stichnothbfb410d2014-11-05 16:04:05 -08004137 CfgNode *Succ = Node->getOutEdges().front();
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004138 getContext().init(Node);
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004139 // Register set setup similar to regAlloc().
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004140 RegSetMask RegInclude = RegSet_All;
4141 RegSetMask RegExclude = RegSet_StackPointer;
4142 if (hasFramePointer())
4143 RegExclude |= RegSet_FramePointer;
4144 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
4145 bool NeedsRegs = false;
4146 // Initialize the set of available registers to the set of what is
4147 // available (not live) at the beginning of the successor block,
4148 // minus all registers used as Dest operands in the Assignments. To
4149 // do this, we start off assuming all registers are available, then
4150 // iterate through the Assignments and remove Dest registers.
4151 // During this iteration, we also determine whether we will actually
4152 // need any extra registers for memory-to-memory copies. If so, we
4153 // do the actual work of removing the live-in registers from the
4154 // set. TODO(stichnot): This work is being repeated for every split
4155 // edge to the successor, so consider updating LiveIn just once
4156 // after all the edges are split.
Jim Stichnoth29841e82014-12-23 12:26:24 -08004157 for (const Inst &I : Assignments) {
4158 Variable *Dest = I.getDest();
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004159 if (Dest->hasReg()) {
4160 Available[Dest->getRegNum()] = false;
Jim Stichnoth29841e82014-12-23 12:26:24 -08004161 } else if (isMemoryOperand(I.getSrc(0))) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004162 NeedsRegs = true; // Src and Dest are both in memory
4163 }
4164 }
4165 if (NeedsRegs) {
4166 LivenessBV &LiveIn = Func->getLiveness()->getLiveIn(Succ);
4167 for (int i = LiveIn.find_first(); i != -1; i = LiveIn.find_next(i)) {
4168 Variable *Var = Func->getLiveness()->getVariable(i, Succ);
4169 if (Var->hasReg())
4170 Available[Var->getRegNum()] = false;
4171 }
4172 }
4173 // Iterate backwards through the Assignments. After lowering each
4174 // assignment, add Dest to the set of available registers, and
4175 // remove Src from the set of available registers. Iteration is
4176 // done backwards to enable incremental updates of the available
4177 // register set, and the lowered instruction numbers may be out of
4178 // order, but that can be worked around by renumbering the block
4179 // afterwards if necessary.
Jim Stichnoth7e571362015-01-09 11:43:26 -08004180 for (const Inst &I : reverse_range(Assignments)) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004181 Context.rewind();
Jim Stichnoth7e571362015-01-09 11:43:26 -08004182 auto Assign = llvm::dyn_cast<InstAssign>(&I);
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004183 Variable *Dest = Assign->getDest();
4184 Operand *Src = Assign->getSrc(0);
4185 Variable *SrcVar = llvm::dyn_cast<Variable>(Src);
4186 // Use normal assignment lowering, except lower mem=mem specially
4187 // so we can register-allocate at the same time.
4188 if (!isMemoryOperand(Dest) || !isMemoryOperand(Src)) {
4189 lowerAssign(Assign);
4190 } else {
4191 assert(Dest->getType() == Src->getType());
4192 const llvm::SmallBitVector &RegsForType =
4193 getRegisterSetForType(Dest->getType());
4194 llvm::SmallBitVector AvailRegsForType = RegsForType & Available;
Jim Stichnothae953202014-12-20 06:17:49 -08004195 Variable *SpillLoc = nullptr;
4196 Variable *Preg = nullptr;
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004197 // TODO(stichnot): Opportunity for register randomization.
4198 int32_t RegNum = AvailRegsForType.find_first();
4199 bool IsVector = isVectorType(Dest->getType());
4200 bool NeedSpill = (RegNum == -1);
4201 if (NeedSpill) {
4202 // Pick some register to spill and update RegNum.
4203 // TODO(stichnot): Opportunity for register randomization.
4204 RegNum = RegsForType.find_first();
4205 Preg = getPhysicalRegister(RegNum, Dest->getType());
4206 SpillLoc = Func->makeVariable(Dest->getType());
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004207 if (IsVector)
4208 _movp(SpillLoc, Preg);
4209 else
4210 _mov(SpillLoc, Preg);
4211 }
4212 assert(RegNum >= 0);
4213 if (llvm::isa<ConstantUndef>(Src))
4214 // Materialize an actual constant instead of undef. RegNum is
4215 // passed in for vector types because undef vectors are
4216 // lowered to vector register of zeroes.
4217 Src =
4218 legalize(Src, Legal_All, IsVector ? RegNum : Variable::NoRegister);
4219 Variable *Tmp = makeReg(Dest->getType(), RegNum);
4220 if (IsVector) {
4221 _movp(Tmp, Src);
4222 _movp(Dest, Tmp);
4223 } else {
4224 _mov(Tmp, Src);
4225 _mov(Dest, Tmp);
4226 }
4227 if (NeedSpill) {
4228 // Restore the spilled register.
4229 if (IsVector)
4230 _movp(Preg, SpillLoc);
4231 else
4232 _mov(Preg, SpillLoc);
4233 }
4234 }
4235 // Update register availability before moving to the previous
4236 // instruction on the Assignments list.
4237 if (Dest->hasReg())
4238 Available[Dest->getRegNum()] = true;
4239 if (SrcVar && SrcVar->hasReg())
4240 Available[SrcVar->getRegNum()] = false;
4241 }
4242
4243 // Add the terminator branch instruction to the end.
Jim Stichnoth5d2fa0c2014-12-01 09:30:55 -08004244 Context.setInsertPoint(Context.getEnd());
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004245 _br(Succ);
4246}
4247
Matt Wala9a0168a2014-07-23 14:56:10 -07004248// There is no support for loading or emitting vector constants, so the
4249// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4250// etc. are initialized with register operations.
4251//
4252// TODO(wala): Add limited support for vector constants so that
4253// complex initialization in registers is unnecessary.
4254
Matt Wala83b80362014-07-16 10:21:30 -07004255Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004256 Variable *Reg = makeReg(Ty, RegNum);
4257 // Insert a FakeDef, since otherwise the live range of Reg might
4258 // be overestimated.
4259 Context.insert(InstFakeDef::create(Func, Reg));
4260 _pxor(Reg, Reg);
4261 return Reg;
4262}
4263
Matt Wala9a0168a2014-07-23 14:56:10 -07004264Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
4265 Variable *MinusOnes = makeReg(Ty, RegNum);
4266 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
4267 Context.insert(InstFakeDef::create(Func, MinusOnes));
4268 _pcmpeq(MinusOnes, MinusOnes);
4269 return MinusOnes;
4270}
4271
Matt Wala83b80362014-07-16 10:21:30 -07004272Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004273 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
Matt Wala9a0168a2014-07-23 14:56:10 -07004274 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
Matt Wala83b80362014-07-16 10:21:30 -07004275 _psub(Dest, MinusOne);
4276 return Dest;
4277}
4278
Matt Wala9a0168a2014-07-23 14:56:10 -07004279Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
4280 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4281 Ty == IceType_v16i8);
4282 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4283 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4284 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004285 _psll(Reg, Ctx->getConstantInt8(Shift));
Matt Wala9a0168a2014-07-23 14:56:10 -07004286 return Reg;
4287 } else {
4288 // SSE has no left shift operation for vectors of 8 bit integers.
4289 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004290 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
Matt Wala9a0168a2014-07-23 14:56:10 -07004291 Variable *Reg = makeReg(Ty, RegNum);
4292 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4293 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4294 return Reg;
4295 }
4296}
4297
Matt Wala49889232014-07-18 12:45:09 -07004298OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
4299 Variable *Slot,
4300 uint32_t Offset) {
4301 // Ensure that Loc is a stack slot.
4302 assert(Slot->getWeight() == RegWeight::Zero);
4303 assert(Slot->getRegNum() == Variable::NoRegister);
4304 // Compute the location of Loc in memory.
4305 // TODO(wala,stichnot): lea should not be required. The address of
4306 // the stack slot is known at compile time (although not until after
4307 // addProlog()).
4308 const Type PointerType = IceType_i32;
4309 Variable *Loc = makeReg(PointerType);
4310 _lea(Loc, Slot);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004311 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
Matt Wala49889232014-07-18 12:45:09 -07004312 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4313}
4314
Matt Wala928f1292014-07-07 16:50:46 -07004315// Helper for legalize() to emit the right code to lower an operand to a
4316// register of the appropriate type.
4317Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
4318 Type Ty = Src->getType();
4319 Variable *Reg = makeReg(Ty, RegNum);
Matt Walaad8f7262014-07-14 17:37:37 -07004320 if (isVectorType(Ty)) {
Matt Wala928f1292014-07-07 16:50:46 -07004321 _movp(Reg, Src);
4322 } else {
4323 _mov(Reg, Src);
4324 }
4325 return Reg;
4326}
4327
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004328Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
Jim Stichnothad403532014-09-25 12:44:17 -07004329 int32_t RegNum) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004330 // Assert that a physical register is allowed. To date, all calls
4331 // to legalize() allow a physical register. If a physical register
4332 // needs to be explicitly disallowed, then new code will need to be
4333 // written to force a spill.
4334 assert(Allowed & Legal_Reg);
4335 // If we're asking for a specific physical register, make sure we're
4336 // not allowing any other operand kinds. (This could be future
4337 // work, e.g. allow the shl shift amount to be either an immediate
4338 // or in ecx.)
4339 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
4340 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
4341 // Before doing anything with a Mem operand, we need to ensure
4342 // that the Base and Index components are in physical registers.
4343 Variable *Base = Mem->getBase();
4344 Variable *Index = Mem->getIndex();
Jim Stichnothae953202014-12-20 06:17:49 -08004345 Variable *RegBase = nullptr;
4346 Variable *RegIndex = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004347 if (Base) {
Jim Stichnothad403532014-09-25 12:44:17 -07004348 RegBase = legalizeToVar(Base);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004349 }
4350 if (Index) {
Jim Stichnothad403532014-09-25 12:44:17 -07004351 RegIndex = legalizeToVar(Index);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004352 }
4353 if (Base != RegBase || Index != RegIndex) {
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004354 From = OperandX8632Mem::create(
4355 Func, Mem->getType(), RegBase, Mem->getOffset(), RegIndex,
4356 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004357 }
4358
4359 if (!(Allowed & Legal_Mem)) {
Matt Wala928f1292014-07-07 16:50:46 -07004360 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004361 }
4362 return From;
4363 }
4364 if (llvm::isa<Constant>(From)) {
Matt Walad8f4a7d2014-06-18 09:55:03 -07004365 if (llvm::isa<ConstantUndef>(From)) {
4366 // Lower undefs to zero. Another option is to lower undefs to an
4367 // uninitialized register; however, using an uninitialized register
4368 // results in less predictable code.
4369 //
4370 // If in the future the implementation is changed to lower undef
4371 // values to uninitialized registers, a FakeDef will be needed:
4372 // Context.insert(InstFakeDef::create(Func, Reg));
4373 // This is in order to ensure that the live range of Reg is not
4374 // overestimated. If the constant being lowered is a 64 bit value,
4375 // then the result should be split and the lo and hi components will
4376 // need to go in uninitialized registers.
Matt Wala83b80362014-07-16 10:21:30 -07004377 if (isVectorType(From->getType()))
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004378 return makeVectorOfZeros(From->getType(), RegNum);
Matt Wala83b80362014-07-16 10:21:30 -07004379 From = Ctx->getConstantZero(From->getType());
Matt Walad8f4a7d2014-06-18 09:55:03 -07004380 }
Matt Walaad8f7262014-07-14 17:37:37 -07004381 // There should be no constants of vector type (other than undef).
4382 assert(!isVectorType(From->getType()));
Jim Stichnothde4ca712014-06-29 08:13:48 -07004383 bool NeedsReg = false;
4384 if (!(Allowed & Legal_Imm))
4385 // Immediate specifically not allowed
4386 NeedsReg = true;
Jan Voung3a569182014-09-29 10:16:01 -07004387 if (!(Allowed & Legal_Mem) && isScalarFloatingType(From->getType()))
Jim Stichnothde4ca712014-06-29 08:13:48 -07004388 // On x86, FP constants are lowered to mem operands.
4389 NeedsReg = true;
Jan Voung5a13f452014-06-18 10:42:02 -07004390 if (NeedsReg) {
Matt Wala928f1292014-07-07 16:50:46 -07004391 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004392 }
4393 return From;
4394 }
4395 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
Matt Walaad8f7262014-07-14 17:37:37 -07004396 // Check if the variable is guaranteed a physical register. This
4397 // can happen either when the variable is pre-colored or when it is
4398 // assigned infinite weight.
4399 bool MustHaveRegister =
4400 (Var->hasReg() || Var->getWeight() == RegWeight::Inf);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004401 // We need a new physical register for the operand if:
Jim Stichnothca662e92014-07-10 15:32:36 -07004402 // Mem is not allowed and Var isn't guaranteed a physical
4403 // register, or
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004404 // RegNum is required and Var->getRegNum() doesn't match.
Matt Walaad8f7262014-07-14 17:37:37 -07004405 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004406 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
Jim Stichnothad403532014-09-25 12:44:17 -07004407 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004408 }
4409 return From;
4410 }
4411 llvm_unreachable("Unhandled operand kind in legalize()");
4412 return From;
4413}
4414
4415// Provide a trivial wrapper to legalize() for this common usage.
Jim Stichnothad403532014-09-25 12:44:17 -07004416Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
4417 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004418}
4419
Jan Voung5cd240d2014-06-25 10:36:46 -07004420OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
4421 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
4422 // It may be the case that address mode optimization already creates
4423 // an OperandX8632Mem, so in that case it wouldn't need another level
4424 // of transformation.
4425 if (!Mem) {
4426 Variable *Base = llvm::dyn_cast<Variable>(Operand);
4427 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
4428 assert(Base || Offset);
Matt Walae3777672014-07-31 09:06:17 -07004429 if (Offset) {
Jan Voungbc004632014-09-16 15:09:10 -07004430 assert(llvm::isa<ConstantInteger32>(Offset) ||
Matt Walae3777672014-07-31 09:06:17 -07004431 llvm::isa<ConstantRelocatable>(Offset));
4432 }
Jan Voung5cd240d2014-06-25 10:36:46 -07004433 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
4434 }
4435 return llvm::cast<OperandX8632Mem>(legalize(Mem));
4436}
4437
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004438Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
Jan Voung1ee34162014-06-24 13:43:30 -07004439 // There aren't any 64-bit integer registers for x86-32.
4440 assert(Type != IceType_i64);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004441 Variable *Reg = Func->makeVariable(Type);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004442 if (RegNum == Variable::NoRegister)
4443 Reg->setWeightInfinite();
4444 else
4445 Reg->setRegNum(RegNum);
4446 return Reg;
4447}
4448
4449void TargetX8632::postLower() {
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004450 if (Ctx->getOptLevel() == Opt_m1)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004451 return;
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004452 // Find two-address non-SSA instructions where Dest==Src0, and set
4453 // the DestNonKillable flag to keep liveness analysis consistent.
Jim Stichnoth5d2fa0c2014-12-01 09:30:55 -08004454 for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004455 if (Inst->isDeleted())
4456 continue;
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004457 if (Variable *Dest = Inst->getDest()) {
4458 // TODO(stichnot): We may need to consider all source
4459 // operands, not just the first one, if using 3-address
4460 // instructions.
4461 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
4462 Inst->setDestNonKillable();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004463 }
4464 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004465}
4466
Jim Stichnothe6d24782014-12-19 05:42:24 -08004467void TargetX8632::makeRandomRegisterPermutation(
4468 llvm::SmallVectorImpl<int32_t> &Permutation,
4469 const llvm::SmallBitVector &ExcludeRegisters) const {
4470 // TODO(stichnot): Declaring Permutation this way loses type/size
4471 // information. Fix this in conjunction with the caller-side TODO.
4472 assert(Permutation.size() >= RegX8632::Reg_NUM);
4473 // Expected upper bound on the number of registers in a single
4474 // equivalence class. For x86-32, this would comprise the 8 XMM
4475 // registers. This is for performance, not correctness.
4476 static const unsigned MaxEquivalenceClassSize = 8;
4477 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
4478 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
4479 EquivalenceClassMap EquivalenceClasses;
4480 SizeT NumShuffled = 0, NumPreserved = 0;
4481
4482// Build up the equivalence classes of registers by looking at the
4483// register properties as well as whether the registers should be
4484// explicitly excluded from shuffling.
4485#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
4486 frameptr, isI8, isInt, isFP) \
4487 if (ExcludeRegisters[RegX8632::val]) { \
4488 /* val stays the same in the resulting permutation. */ \
4489 Permutation[RegX8632::val] = RegX8632::val; \
4490 ++NumPreserved; \
4491 } else { \
4492 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
4493 (isInt << 3) | (isFP << 4); \
4494 /* val is assigned to an equivalence class based on its properties. */ \
4495 EquivalenceClasses[Index].push_back(RegX8632::val); \
4496 }
4497 REGX8632_TABLE
4498#undef X
4499
4500 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4501
4502 // Shuffle the resulting equivalence classes.
4503 for (auto I : EquivalenceClasses) {
4504 const RegisterList &List = I.second;
4505 RegisterList Shuffled(List);
4506 std::random_shuffle(Shuffled.begin(), Shuffled.end(), RNG);
4507 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
4508 Permutation[List[SI]] = Shuffled[SI];
4509 ++NumShuffled;
4510 }
4511 }
4512
4513 assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
4514
Jim Stichnothfa4efea2015-01-27 05:06:03 -08004515 if (Func->isVerbose(IceV_Random)) {
Jim Stichnothe4a8f402015-01-20 12:52:51 -08004516 OstreamLocker L(Func->getContext());
Jim Stichnothe6d24782014-12-19 05:42:24 -08004517 Ostream &Str = Func->getContext()->getStrDump();
4518 Str << "Register equivalence classes:\n";
4519 for (auto I : EquivalenceClasses) {
4520 Str << "{";
4521 const RegisterList &List = I.second;
4522 bool First = true;
4523 for (int32_t Register : List) {
4524 if (!First)
4525 Str << " ";
4526 First = false;
4527 Str << getRegName(Register, IceType_i32);
4528 }
4529 Str << "}\n";
4530 }
4531 }
4532}
4533
Jan Voungbc004632014-09-16 15:09:10 -07004534template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08004535 if (!ALLOW_DUMP)
4536 return;
Matt Wala928f1292014-07-07 16:50:46 -07004537 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothbca2f652014-11-01 10:13:54 -07004538 Str << "$" << (int32_t)getValue();
Jan Voungbc004632014-09-16 15:09:10 -07004539}
4540
4541template <> void ConstantInteger64::emit(GlobalContext *) const {
4542 llvm_unreachable("Not expecting to emit 64-bit integers");
Matt Wala928f1292014-07-07 16:50:46 -07004543}
4544
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004545template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08004546 if (!ALLOW_DUMP)
4547 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004548 Ostream &Str = Ctx->getStrEmit();
Jan Voung1d62cf02015-01-09 14:57:32 -08004549 emitPoolLabel(Str);
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004550}
4551
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004552template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08004553 if (!ALLOW_DUMP)
4554 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004555 Ostream &Str = Ctx->getStrEmit();
Jan Voung1d62cf02015-01-09 14:57:32 -08004556 emitPoolLabel(Str);
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004557}
4558
Matt Walae3777672014-07-31 09:06:17 -07004559void ConstantUndef::emit(GlobalContext *) const {
4560 llvm_unreachable("undef value encountered by emitter.");
4561}
4562
Jim Stichnothfa4efea2015-01-27 05:06:03 -08004563TargetGlobalX8632::TargetGlobalX8632(GlobalContext *Ctx)
4564 : TargetGlobalLowering(Ctx) {}
Jim Stichnothde4ca712014-06-29 08:13:48 -07004565
Jim Stichnothfa4efea2015-01-27 05:06:03 -08004566void TargetGlobalX8632::lowerInit(const VariableDeclaration &Var) const {
Jan Voung08c3bcd2014-12-01 17:55:16 -08004567 // TODO(jvoung): handle this without text.
4568 if (Ctx->getFlags().UseELFWriter)
4569 return;
Jim Stichnothde4ca712014-06-29 08:13:48 -07004570
4571 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothde4ca712014-06-29 08:13:48 -07004572
Karl Schimpf9d98d792014-10-13 15:01:08 -07004573 const VariableDeclaration::InitializerListType &Initializers =
4574 Var.getInitializers();
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004575
4576 // If external and not initialized, this must be a cross test.
4577 // Don't generate a declaration for such cases.
Jim Stichnoth088b2be2014-10-23 12:02:08 -07004578 bool IsExternal = Var.isExternal() || Ctx->getFlags().DisableInternal;
Jim Stichnothdd842db2015-01-27 12:53:53 -08004579 if (IsExternal && !Var.hasInitializer())
4580 return;
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004581
4582 bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004583 bool IsConstant = Var.getIsConstant();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004584 uint32_t Align = Var.getAlignment();
4585 SizeT Size = Var.getNumBytes();
4586 IceString MangledName = Var.mangleName(Ctx);
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004587 IceString SectionSuffix = "";
4588 if (Ctx->getFlags().DataSections)
4589 SectionSuffix = "." + MangledName;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004590
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004591 Str << "\t.type\t" << MangledName << ",@object\n";
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004592
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004593 if (IsConstant)
4594 Str << "\t.section\t.rodata" << SectionSuffix << ",\"a\",@progbits\n";
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004595 else if (HasNonzeroInitializer)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004596 Str << "\t.section\t.data" << SectionSuffix << ",\"aw\",@progbits\n";
Jan Voungfed97af2015-01-25 10:46:00 -08004597 else
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004598 Str << "\t.section\t.bss" << SectionSuffix << ",\"aw\",@nobits\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004599
4600 if (IsExternal)
4601 Str << "\t.globl\t" << MangledName << "\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004602
Jan Voungfed97af2015-01-25 10:46:00 -08004603 if (Align > 1)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004604 Str << "\t.align\t" << Align << "\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004605
Jan Voungfed97af2015-01-25 10:46:00 -08004606 Str << MangledName << ":\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004607
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004608 if (HasNonzeroInitializer) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004609 for (VariableDeclaration::Initializer *Init : Initializers) {
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004610 switch (Init->getKind()) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004611 case VariableDeclaration::Initializer::DataInitializerKind: {
4612 const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(Init)
4613 ->getContents();
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004614 for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
4615 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4616 }
4617 break;
4618 }
Karl Schimpf9d98d792014-10-13 15:01:08 -07004619 case VariableDeclaration::Initializer::ZeroInitializerKind:
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004620 Str << "\t.zero\t" << Init->getNumBytes() << "\n";
4621 break;
Karl Schimpf9d98d792014-10-13 15:01:08 -07004622 case VariableDeclaration::Initializer::RelocInitializerKind: {
4623 const auto Reloc =
4624 llvm::cast<VariableDeclaration::RelocInitializer>(Init);
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004625 Str << "\t.long\t";
Karl Schimpf9d98d792014-10-13 15:01:08 -07004626 Str << Reloc->getDeclaration()->mangleName(Ctx);
Jan Voungc0d965f2014-11-04 16:55:01 -08004627 if (RelocOffsetT Offset = Reloc->getOffset()) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004628 if (Offset >= 0 || (Offset == INT32_MIN))
4629 Str << " + " << Offset;
4630 else
4631 Str << " - " << -Offset;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004632 }
4633 Str << "\n";
4634 break;
4635 }
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004636 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07004637 }
Jan Voungfed97af2015-01-25 10:46:00 -08004638 } else
4639 // NOTE: for non-constant zero initializers, this is BSS (no bits),
4640 // so an ELF writer would not write to the file, and only track
4641 // virtual offsets, but the .s writer still needs this .zero and
4642 // cannot simply use the .size to advance offsets.
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004643 Str << "\t.zero\t" << Size << "\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004644
Jan Voungfed97af2015-01-25 10:46:00 -08004645 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
Jim Stichnothde4ca712014-06-29 08:13:48 -07004646}
4647
Jim Stichnothfa4efea2015-01-27 05:06:03 -08004648template <typename T> struct PoolTypeConverter {};
4649
4650template <> struct PoolTypeConverter<float> {
4651 typedef uint32_t PrimitiveIntType;
4652 typedef ConstantFloat IceType;
4653 static const Type Ty = IceType_f32;
4654 static const char *TypeName;
4655 static const char *AsmTag;
4656 static const char *PrintfString;
4657};
4658const char *PoolTypeConverter<float>::TypeName = "float";
4659const char *PoolTypeConverter<float>::AsmTag = ".long";
4660const char *PoolTypeConverter<float>::PrintfString = "0x%x";
4661
4662template <> struct PoolTypeConverter<double> {
4663 typedef uint64_t PrimitiveIntType;
4664 typedef ConstantDouble IceType;
4665 static const Type Ty = IceType_f64;
4666 static const char *TypeName;
4667 static const char *AsmTag;
4668 static const char *PrintfString;
4669};
4670const char *PoolTypeConverter<double>::TypeName = "double";
4671const char *PoolTypeConverter<double>::AsmTag = ".quad";
4672const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
4673
4674template <typename T>
4675void TargetGlobalX8632::emitConstantPool(GlobalContext *Ctx) {
4676 // Note: Still used by emit IAS.
4677 Ostream &Str = Ctx->getStrEmit();
4678 Type Ty = T::Ty;
4679 SizeT Align = typeAlignInBytes(Ty);
4680 ConstantList Pool = Ctx->getConstantPool(Ty);
4681
4682 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
4683 << "\n";
4684 Str << "\t.align\t" << Align << "\n";
4685 for (Constant *C : Pool) {
4686 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
4687 typename T::IceType::PrimType Value = Const->getValue();
4688 // Use memcpy() to copy bits from Value into RawValue in a way
4689 // that avoids breaking strict-aliasing rules.
4690 typename T::PrimitiveIntType RawValue;
4691 memcpy(&RawValue, &Value, sizeof(Value));
4692 char buf[30];
4693 int CharsPrinted =
4694 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
4695 assert(CharsPrinted >= 0 &&
4696 (size_t)CharsPrinted < llvm::array_lengthof(buf));
4697 (void)CharsPrinted; // avoid warnings if asserts are disabled
4698 Const->emitPoolLabel(Str);
4699 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
4700 << Value << "\n";
4701 }
4702}
4703
4704void TargetGlobalX8632::lowerConstants(GlobalContext *Ctx) const {
4705 if (Ctx->getFlags().DisableTranslation)
4706 return;
4707 // No need to emit constants from the int pool since (for x86) they
4708 // are embedded as immediates in the instructions, just emit float/double.
4709 if (Ctx->getFlags().UseELFWriter) {
4710 ELFObjectWriter *Writer = Ctx->getObjectWriter();
4711 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
4712 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
4713 } else {
4714 OstreamLocker L(Ctx);
4715 emitConstantPool<PoolTypeConverter<float>>(Ctx);
4716 emitConstantPool<PoolTypeConverter<double>>(Ctx);
4717 }
4718}
4719
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004720} // end of namespace Ice