blob: c1ba40429a8f788b78ed600e2a8f731d1f3ac83b [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
Jim Stichnoth70d0a052014-11-14 15:53:46 -080012// high-level instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070013//
14//===----------------------------------------------------------------------===//
15
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070016#include "llvm/Support/MathExtras.h"
17
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070018#include "IceCfg.h"
19#include "IceCfgNode.h"
Jim Stichnothbfb03e52014-08-26 10:29:05 -070020#include "IceClFlags.h"
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070021#include "IceDefs.h"
Jan Voungec270732015-01-12 17:00:22 -080022#include "IceELFObjectWriter.h"
Karl Schimpfe3f64d02014-10-07 10:38:22 -070023#include "IceGlobalInits.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070024#include "IceInstX8632.h"
Jim Stichnoth336f6c42014-10-30 15:01:31 -070025#include "IceLiveness.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070026#include "IceOperand.h"
Jan Voungbd385e42014-09-18 18:18:10 -070027#include "IceRegistersX8632.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070028#include "IceTargetLoweringX8632.def"
29#include "IceTargetLoweringX8632.h"
Jan Voung8acded02014-09-22 18:02:25 -070030#include "IceUtils.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070031
32namespace Ice {
33
34namespace {
35
Matt Walace0ca8f2014-07-24 12:34:20 -070036// The following table summarizes the logic for lowering the fcmp
37// instruction. There is one table entry for each of the 16 conditions.
38//
39// The first four columns describe the case when the operands are
40// floating point scalar values. A comment in lowerFcmp() describes the
41// lowering template. In the most general case, there is a compare
42// followed by two conditional branches, because some fcmp conditions
43// don't map to a single x86 conditional branch. However, in many cases
44// it is possible to swap the operands in the comparison and have a
45// single conditional branch. Since it's quite tedious to validate the
46// table by hand, good execution tests are helpful.
47//
48// The last two columns describe the case when the operands are vectors
49// of floating point values. For most fcmp conditions, there is a clear
50// mapping to a single x86 cmpps instruction variant. Some fcmp
51// conditions require special code to handle and these are marked in the
52// table with a Cmpps_Invalid predicate.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070053const struct TableFcmp_ {
54 uint32_t Default;
Matt Walace0ca8f2014-07-24 12:34:20 -070055 bool SwapScalarOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070056 CondX86::BrCond C1, C2;
Matt Walace0ca8f2014-07-24 12:34:20 -070057 bool SwapVectorOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070058 CondX86::CmppsCond Predicate;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070059} TableFcmp[] = {
Matt Walace0ca8f2014-07-24 12:34:20 -070060#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jan Voungbd385e42014-09-18 18:18:10 -070061 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070062 ,
Jim Stichnothd9dc82e2015-03-03 17:06:33 -080063 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070064#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -080065};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070066const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
67
68// The following table summarizes the logic for lowering the icmp instruction
69// for i32 and narrower types. Each icmp condition has a clear mapping to an
70// x86 conditional branch instruction.
71
72const struct TableIcmp32_ {
Jan Voungbd385e42014-09-18 18:18:10 -070073 CondX86::BrCond Mapping;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070074} TableIcmp32[] = {
75#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070076 { CondX86::C_32 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070077 ,
Jim Stichnothd9dc82e2015-03-03 17:06:33 -080078 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070079#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -080080};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070081const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
82
83// The following table summarizes the logic for lowering the icmp instruction
84// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
85// conditional branches are needed. For the other conditions, three separate
86// conditional branches are needed.
87const struct TableIcmp64_ {
Jan Voungbd385e42014-09-18 18:18:10 -070088 CondX86::BrCond C1, C2, C3;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070089} TableIcmp64[] = {
90#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070091 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070092 ,
Jim Stichnothd9dc82e2015-03-03 17:06:33 -080093 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070094#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -080095};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070096const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
97
Jan Voungbd385e42014-09-18 18:18:10 -070098CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070099 size_t Index = static_cast<size_t>(Cond);
100 assert(Index < TableIcmp32Size);
101 return TableIcmp32[Index].Mapping;
102}
103
Matt Wala49889232014-07-18 12:45:09 -0700104const struct TableTypeX8632Attributes_ {
105 Type InVectorElementType;
106} TableTypeX8632Attributes[] = {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700107#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Matt Wala49889232014-07-18 12:45:09 -0700108 { elementty } \
109 ,
Jim Stichnothd9dc82e2015-03-03 17:06:33 -0800110 ICETYPEX8632_TABLE
Matt Wala49889232014-07-18 12:45:09 -0700111#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -0800112};
Matt Wala49889232014-07-18 12:45:09 -0700113const size_t TableTypeX8632AttributesSize =
114 llvm::array_lengthof(TableTypeX8632Attributes);
115
116// Return the type which the elements of the vector have in the X86
117// representation of the vector.
118Type getInVectorElementType(Type Ty) {
119 assert(isVectorType(Ty));
120 size_t Index = static_cast<size_t>(Ty);
Jim Stichnoth6e992142014-07-30 14:45:20 -0700121 (void)Index;
Matt Wala49889232014-07-18 12:45:09 -0700122 assert(Index < TableTypeX8632AttributesSize);
123 return TableTypeX8632Attributes[Ty].InVectorElementType;
124}
125
Matt Wala45a06232014-07-09 16:33:22 -0700126// The maximum number of arguments to pass in XMM registers
Matt Wala105b7042014-08-11 19:56:19 -0700127const uint32_t X86_MAX_XMM_ARGS = 4;
Matt Wala83b80362014-07-16 10:21:30 -0700128// The number of bits in a byte
Matt Wala105b7042014-08-11 19:56:19 -0700129const uint32_t X86_CHAR_BIT = 8;
130// Stack alignment
131const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
132// Size of the return address on the stack
133const uint32_t X86_RET_IP_SIZE_BYTES = 4;
Matt Walac3302742014-08-15 16:21:56 -0700134// The number of different NOP instructions
135const uint32_t X86_NUM_NOP_VARIANTS = 5;
Matt Wala105b7042014-08-11 19:56:19 -0700136
Matt Walad4799f42014-08-14 14:24:12 -0700137// Value is in bytes. Return Value adjusted to the next highest multiple
138// of the stack alignment.
139uint32_t applyStackAlignment(uint32_t Value) {
Jan Voung55500db2015-05-26 14:25:40 -0700140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
Matt Wala105b7042014-08-11 19:56:19 -0700141}
Matt Wala45a06232014-07-09 16:33:22 -0700142
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700143// In some cases, there are x-macros tables for both high-level and
144// low-level instructions/operands that use the same enum key value.
145// The tables are kept separate to maintain a proper separation
Jim Stichnothfac55172014-10-01 13:06:21 -0700146// between abstraction layers. There is a risk that the tables could
147// get out of sync if enum values are reordered or if entries are
148// added or deleted. The following dummy namespaces use
149// static_asserts to ensure everything is kept in sync.
150
151// Validate the enum values in FCMPX8632_TABLE.
152namespace dummy1 {
153// Define a temporary set of enum values based on low-level table
154// entries.
155enum _tmp_enum {
Matt Walace0ca8f2014-07-24 12:34:20 -0700156#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700157 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700158#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700159 _num
160};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700161// Define a set of constants based on high-level table entries.
162#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
JF Bastien8427ea22015-01-27 12:56:49 -0800163ICEINSTFCMP_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700164#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700165// Define a set of constants based on low-level table entries, and
166// ensure the table entry keys are consistent.
Matt Walace0ca8f2014-07-24 12:34:20 -0700167#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700168 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700169 static_assert( \
170 _table1_##val == _table2_##val, \
171 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800172FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700173#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700174// Repeat the static asserts with respect to the high-level table
175// entries in case the high-level table has extra entries.
176#define X(tag, str) \
177 static_assert( \
178 _table1_##tag == _table2_##tag, \
179 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800180ICEINSTFCMP_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700181#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700182} // end of namespace dummy1
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700183
Jim Stichnothfac55172014-10-01 13:06:21 -0700184// Validate the enum values in ICMPX8632_TABLE.
185namespace dummy2 {
186// Define a temporary set of enum values based on low-level table
187// entries.
188enum _tmp_enum {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700189#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700190 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700191#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700192 _num
193};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700194// Define a set of constants based on high-level table entries.
195#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
JF Bastien8427ea22015-01-27 12:56:49 -0800196ICEINSTICMP_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700197#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700198// Define a set of constants based on low-level table entries, and
199// ensure the table entry keys are consistent.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700200#define X(val, C_32, C1_64, C2_64, C3_64) \
201 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700202 static_assert( \
203 _table1_##val == _table2_##val, \
204 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800205ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700206#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700207// Repeat the static asserts with respect to the high-level table
208// entries in case the high-level table has extra entries.
209#define X(tag, str) \
210 static_assert( \
211 _table1_##tag == _table2_##tag, \
212 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800213ICEINSTICMP_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700214#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700215} // end of namespace dummy2
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700216
Jim Stichnothfac55172014-10-01 13:06:21 -0700217// Validate the enum values in ICETYPEX8632_TABLE.
218namespace dummy3 {
219// Define a temporary set of enum values based on low-level table
220// entries.
221enum _tmp_enum {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700222#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
Jim Stichnothfac55172014-10-01 13:06:21 -0700223 ICETYPEX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700224#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700225 _num
226};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700227// Define a set of constants based on high-level table entries.
Matt Wala928f1292014-07-07 16:50:46 -0700228#define X(tag, size, align, elts, elty, str) \
229 static const int _table1_##tag = tag;
JF Bastien8427ea22015-01-27 12:56:49 -0800230ICETYPE_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700231#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700232// Define a set of constants based on low-level table entries, and
233// ensure the table entry keys are consistent.
Jim Stichnothbca2f652014-11-01 10:13:54 -0700234#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700235 static const int _table2_##tag = _tmp_##tag; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700236 static_assert(_table1_##tag == _table2_##tag, \
237 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800238ICETYPEX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700239#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700240// Repeat the static asserts with respect to the high-level table
241// entries in case the high-level table has extra entries.
Matt Wala928f1292014-07-07 16:50:46 -0700242#define X(tag, size, align, elts, elty, str) \
Jim Stichnothfac55172014-10-01 13:06:21 -0700243 static_assert(_table1_##tag == _table2_##tag, \
244 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800245ICETYPE_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700246#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700247} // end of namespace dummy3
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700248
Qining Lu253dc8a2015-06-22 10:10:23 -0700249// A helper class to ease the settings of RandomizationPoolingPause
250// to disable constant blinding or pooling for some translation phases.
251class BoolFlagSaver {
252 BoolFlagSaver() = delete;
253 BoolFlagSaver(const BoolFlagSaver &) = delete;
254 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
255
256public:
257 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
258 ~BoolFlagSaver() { Flag = OldValue; }
259
260private:
261 const bool OldValue;
262 bool &Flag;
263};
264
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700265} // end of anonymous namespace
266
Jim Stichnotha59ae6f2015-05-17 10:11:41 -0700267BoolFoldingEntry::BoolFoldingEntry(Inst *I)
Jim Stichnotheafb56c2015-06-22 10:35:22 -0700268 : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {}
Jim Stichnotha59ae6f2015-05-17 10:11:41 -0700269
270BoolFolding::BoolFoldingProducerKind
271BoolFolding::getProducerKind(const Inst *Instr) {
272 if (llvm::isa<InstIcmp>(Instr)) {
273 if (Instr->getSrc(0)->getType() != IceType_i64)
274 return PK_Icmp32;
275 return PK_None; // TODO(stichnot): actually PK_Icmp64;
276 }
277 return PK_None; // TODO(stichnot): remove this
278
279 if (llvm::isa<InstFcmp>(Instr))
280 return PK_Fcmp;
281 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
282 switch (Cast->getCastKind()) {
283 default:
284 return PK_None;
285 case InstCast::Trunc:
286 return PK_Trunc;
287 }
288 }
289 return PK_None;
290}
291
292BoolFolding::BoolFoldingConsumerKind
293BoolFolding::getConsumerKind(const Inst *Instr) {
294 if (llvm::isa<InstBr>(Instr))
295 return CK_Br;
296 if (llvm::isa<InstSelect>(Instr))
297 return CK_Select;
298 return CK_None; // TODO(stichnot): remove this
299
300 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
301 switch (Cast->getCastKind()) {
302 default:
303 return CK_None;
304 case InstCast::Sext:
305 return CK_Sext;
306 case InstCast::Zext:
307 return CK_Zext;
308 }
309 }
310 return CK_None;
311}
312
313// Returns true if the producing instruction has a "complex" lowering
314// sequence. This generally means that its lowering sequence requires
315// more than one conditional branch, namely 64-bit integer compares
316// and some floating-point compares. When this is true, and there is
317// more than one consumer, we prefer to disable the folding
318// optimization because it minimizes branches.
319bool BoolFolding::hasComplexLowering(const Inst *Instr) {
320 switch (getProducerKind(Instr)) {
321 default:
322 return false;
323 case PK_Icmp64:
324 return true;
325 case PK_Fcmp:
326 return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
327 CondX86::Br_None;
328 }
329}
330
331void BoolFolding::init(CfgNode *Node) {
332 Producers.clear();
333 for (Inst &Instr : Node->getInsts()) {
334 // Check whether Instr is a valid producer.
335 Variable *Var = Instr.getDest();
336 if (!Instr.isDeleted() // only consider non-deleted instructions
337 && Var // only instructions with an actual dest var
338 && Var->getType() == IceType_i1 // only bool-type dest vars
339 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
340 Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);
341 }
342 // Check each src variable against the map.
343 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
344 Operand *Src = Instr.getSrc(I);
345 SizeT NumVars = Src->getNumVars();
346 for (SizeT J = 0; J < NumVars; ++J) {
347 const Variable *Var = Src->getVar(J);
348 SizeT VarNum = Var->getIndex();
349 if (containsValid(VarNum)) {
350 if (I != 0 // All valid consumers use Var as the first source operand
351 || getConsumerKind(&Instr) == CK_None // must be white-listed
352 || (Producers[VarNum].IsComplex && // complex can't be multi-use
353 Producers[VarNum].NumUses > 0)) {
354 setInvalid(VarNum);
355 continue;
356 }
357 ++Producers[VarNum].NumUses;
358 if (Instr.isLastUse(Var)) {
359 Producers[VarNum].IsLiveOut = false;
360 }
361 }
362 }
363 }
364 }
365 for (auto &I : Producers) {
366 // Ignore entries previously marked invalid.
367 if (I.second.Instr == nullptr)
368 continue;
369 // Disable the producer if its dest may be live beyond this block.
370 if (I.second.IsLiveOut) {
371 setInvalid(I.first);
372 continue;
373 }
374 // Mark as "dead" rather than outright deleting. This is so that
375 // other peephole style optimizations during or before lowering
376 // have access to this instruction in undeleted form. See for
377 // example tryOptimizedCmpxchgCmpBr().
378 I.second.Instr->setDead();
379 }
380}
381
382const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const {
383 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
384 if (Var == nullptr)
385 return nullptr;
386 SizeT VarNum = Var->getIndex();
387 auto Element = Producers.find(VarNum);
388 if (Element == Producers.end())
389 return nullptr;
390 return Element->second.Instr;
391}
392
393void BoolFolding::dump(const Cfg *Func) const {
394 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))
395 return;
396 OstreamLocker L(Func->getContext());
397 Ostream &Str = Func->getContext()->getStrDump();
398 for (auto &I : Producers) {
399 if (I.second.Instr == nullptr)
400 continue;
401 Str << "Found foldable producer:\n ";
402 I.second.Instr->dump(Func);
403 Str << "\n";
404 }
405}
406
407void TargetX8632::initNodeForLowering(CfgNode *Node) {
408 FoldingInfo.init(Node);
409 FoldingInfo.dump(Func);
410}
411
Jim Stichnotheafb56c2015-06-22 10:35:22 -0700412TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) {
Jan Voung1f47ad02015-03-20 15:01:26 -0700413 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) ==
414 (TargetInstructionSet::X86InstructionSet_End -
415 TargetInstructionSet::X86InstructionSet_Begin),
416 "X86InstructionSet range different from TargetInstructionSet");
Jan Voungd062f732015-06-15 17:17:31 -0700417 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
418 TargetInstructionSet::BaseInstructionSet) {
419 InstructionSet = static_cast<X86InstructionSet>(
420 (Func->getContext()->getFlags().getTargetInstructionSet() -
421 TargetInstructionSet::X86InstructionSet_Begin) +
422 X86InstructionSet::Begin);
423 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700424 // TODO: Don't initialize IntegerRegisters and friends every time.
425 // Instead, initialize in some sort of static initializer for the
426 // class.
Jan Voungbd385e42014-09-18 18:18:10 -0700427 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
428 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
429 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
430 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
431 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
432 ScratchRegs.resize(RegX8632::Reg_NUM);
433#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700434 frameptr, isI8, isInt, isFP) \
Jan Voungbd385e42014-09-18 18:18:10 -0700435 IntegerRegisters[RegX8632::val] = isInt; \
436 IntegerRegistersI8[RegX8632::val] = isI8; \
437 FloatRegisters[RegX8632::val] = isFP; \
438 VectorRegisters[RegX8632::val] = isFP; \
439 ScratchRegs[RegX8632::val] = scratch;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700440 REGX8632_TABLE;
441#undef X
442 TypeToRegisterSet[IceType_void] = InvalidRegisters;
443 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
444 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
445 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
446 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
447 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
448 TypeToRegisterSet[IceType_f32] = FloatRegisters;
449 TypeToRegisterSet[IceType_f64] = FloatRegisters;
Matt Wala928f1292014-07-07 16:50:46 -0700450 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
451 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
452 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
453 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
454 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
455 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
456 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700457}
458
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700459void TargetX8632::translateO2() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700460 TimerMarker T(TimerStack::TT_O2, Func);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700461
Karl Schimpfdf80eb82015-02-09 14:20:22 -0800462 if (!Ctx->getFlags().getPhiEdgeSplit()) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700463 // Lower Phi instructions.
464 Func->placePhiLoads();
465 if (Func->hasError())
466 return;
467 Func->placePhiStores();
468 if (Func->hasError())
469 return;
470 Func->deletePhis();
471 if (Func->hasError())
472 return;
473 Func->dump("After Phi lowering");
474 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700475
476 // Address mode optimization.
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700477 Func->getVMetadata()->init(VMK_SingleDefs);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700478 Func->doAddressOpt();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700479
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700480 // Find read-modify-write opportunities. Do this after address mode
481 // optimization so that doAddressOpt() doesn't need to be applied to RMW
482 // instructions as well.
483 findRMW();
484 Func->dump("After RMW transform");
485
Matt Wala45a06232014-07-09 16:33:22 -0700486 // Argument lowering
Matt Wala45a06232014-07-09 16:33:22 -0700487 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700488
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700489 // Target lowering. This requires liveness analysis for some parts
490 // of the lowering decisions, such as compare/branch fusing. If
491 // non-lightweight liveness analysis is used, the instructions need
492 // to be renumbered first. TODO: This renumbering should only be
493 // necessary if we're actually calculating live intervals, which we
494 // only do for register allocation.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700495 Func->renumberInstructions();
496 if (Func->hasError())
497 return;
Matt Wala45a06232014-07-09 16:33:22 -0700498
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700499 // TODO: It should be sufficient to use the fastest liveness
500 // calculation, i.e. livenessLightweight(). However, for some
501 // reason that slows down the rest of the translation. Investigate.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700502 Func->liveness(Liveness_Basic);
503 if (Func->hasError())
504 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700505 Func->dump("After x86 address mode opt");
Matt Wala45a06232014-07-09 16:33:22 -0700506
Qining Lu253dc8a2015-06-22 10:10:23 -0700507 // Disable constant blinding or pooling for load optimization.
508 {
509 BoolFlagSaver B(RandomizationPoolingPaused, true);
510 doLoadOpt();
511 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700512 Func->genCode();
513 if (Func->hasError())
514 return;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700515 Func->dump("After x86 codegen");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700516
517 // Register allocation. This requires instruction renumbering and
518 // full liveness analysis.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700519 Func->renumberInstructions();
520 if (Func->hasError())
521 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700522 Func->liveness(Liveness_Intervals);
523 if (Func->hasError())
524 return;
Jim Stichnoth9c234e22014-10-01 09:28:21 -0700525 // Validate the live range computations. The expensive validation
526 // call is deliberately only made when assertions are enabled.
527 assert(Func->validateLiveness());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700528 // The post-codegen dump is done here, after liveness analysis and
529 // associated cleanup, to make the dump cleaner and more useful.
530 Func->dump("After initial x8632 codegen");
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700531 Func->getVMetadata()->init(VMK_All);
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800532 regAlloc(RAK_Global);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700533 if (Func->hasError())
534 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700535 Func->dump("After linear scan regalloc");
536
Karl Schimpfdf80eb82015-02-09 14:20:22 -0800537 if (Ctx->getFlags().getPhiEdgeSplit()) {
Qining Lu253dc8a2015-06-22 10:10:23 -0700538 // We need to pause constant blinding or pooling during advanced
539 // phi lowering, unless the lowering assignment has a physical
540 // register for the dest Variable.
541 {
542 BoolFlagSaver B(RandomizationPoolingPaused, true);
543 Func->advancedPhiLowering();
544 }
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700545 Func->dump("After advanced Phi lowering");
546 }
547
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700548 // Stack frame mapping.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700549 Func->genFrame();
550 if (Func->hasError())
551 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700552 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700553
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700554 Func->contractEmptyNodes();
555 Func->reorderNodes();
Jim Stichnoth98712a32014-10-24 10:59:02 -0700556
Jim Stichnothff9c7062014-09-18 04:50:49 -0700557 // Branch optimization. This needs to be done just before code
558 // emission. In particular, no transformations that insert or
559 // reorder CfgNodes should be done after branch optimization. We go
560 // ahead and do it before nop insertion to reduce the amount of work
561 // needed for searching for opportunities.
562 Func->doBranchOpt();
563 Func->dump("After branch optimization");
564
Matt Walac3302742014-08-15 16:21:56 -0700565 // Nop insertion
Jan Voung1f47ad02015-03-20 15:01:26 -0700566 if (Ctx->getFlags().shouldDoNopInsertion()) {
Matt Walac3302742014-08-15 16:21:56 -0700567 Func->doNopInsertion();
568 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700569}
570
571void TargetX8632::translateOm1() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700572 TimerMarker T(TimerStack::TT_Om1, Func);
Jim Stichnothbb8b6242014-11-04 09:10:01 -0800573
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700574 Func->placePhiLoads();
575 if (Func->hasError())
576 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700577 Func->placePhiStores();
578 if (Func->hasError())
579 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700580 Func->deletePhis();
581 if (Func->hasError())
582 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700583 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700584
Matt Wala45a06232014-07-09 16:33:22 -0700585 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700586
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700587 Func->genCode();
588 if (Func->hasError())
589 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700590 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700591
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800592 regAlloc(RAK_InfOnly);
593 if (Func->hasError())
594 return;
595 Func->dump("After regalloc of infinite-weight variables");
596
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700597 Func->genFrame();
598 if (Func->hasError())
599 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700600 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700601
602 // Nop insertion
Jan Voung1f47ad02015-03-20 15:01:26 -0700603 if (Ctx->getFlags().shouldDoNopInsertion()) {
Matt Walac3302742014-08-15 16:21:56 -0700604 Func->doNopInsertion();
605 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700606}
607
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -0700608namespace {
609
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700610bool canRMW(const InstArithmetic *Arith) {
611 Type Ty = Arith->getDest()->getType();
Jim Stichnothcac003e2015-06-18 12:48:58 -0700612 // X86 vector instructions write to a register and have no RMW
613 // option.
614 if (isVectorType(Ty))
615 return false;
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700616 bool isI64 = Ty == IceType_i64;
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700617
618 switch (Arith->getOp()) {
619 // Not handled for lack of simple lowering:
Jim Stichnothcac003e2015-06-18 12:48:58 -0700620 // shift on i64
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700621 // mul, udiv, urem, sdiv, srem, frem
Jim Stichnothcac003e2015-06-18 12:48:58 -0700622 // Not handled for lack of RMW instructions:
623 // fadd, fsub, fmul, fdiv (also vector types)
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700624 default:
625 return false;
626 case InstArithmetic::Add:
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700627 case InstArithmetic::Sub:
628 case InstArithmetic::And:
629 case InstArithmetic::Or:
630 case InstArithmetic::Xor:
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700631 return true;
632 case InstArithmetic::Shl:
633 case InstArithmetic::Lshr:
634 case InstArithmetic::Ashr:
635 return false; // TODO(stichnot): implement
Jim Stichnothcac003e2015-06-18 12:48:58 -0700636 return !isI64;
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700637 }
638}
639
640bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
641 if (A == B)
642 return true;
643 if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) {
644 if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) {
645 return MemA->getBase() == MemB->getBase() &&
646 MemA->getOffset() == MemB->getOffset() &&
647 MemA->getIndex() == MemB->getIndex() &&
648 MemA->getShift() == MemB->getShift() &&
649 MemA->getSegmentRegister() == MemB->getSegmentRegister();
650 }
651 }
652 return false;
653}
654
655} // end of anonymous namespace
656
657void TargetX8632::findRMW() {
658 Func->dump("Before RMW");
659 OstreamLocker L(Func->getContext());
660 Ostream &Str = Func->getContext()->getStrDump();
661 for (CfgNode *Node : Func->getNodes()) {
662 // Walk through the instructions, considering each sequence of 3
663 // instructions, and look for the particular RMW pattern. Note that this
664 // search can be "broken" (false negatives) if there are intervening deleted
665 // instructions, or intervening instructions that could be safely moved out
666 // of the way to reveal an RMW pattern.
667 auto E = Node->getInsts().end();
668 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
669 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
670 // Make I3 skip over deleted instructions.
671 while (I3 != E && I3->isDeleted())
672 ++I3;
673 if (I1 == E || I2 == E || I3 == E)
674 continue;
675 assert(!I1->isDeleted());
676 assert(!I2->isDeleted());
677 assert(!I3->isDeleted());
678 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
679 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
680 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
681 // Look for:
682 // a = Load addr
683 // b = <op> a, other
684 // Store b, addr
685 // Change to:
686 // a = Load addr
687 // b = <op> a, other
688 // x = FakeDef
689 // RMW <op>, addr, other, x
690 // b = Store b, addr, x
691 // Note that inferTwoAddress() makes sure setDestNonKillable() gets
692 // called on the updated Store instruction, to avoid liveness
693 // problems later.
694 //
695 // With this transformation, the Store instruction acquires a Dest
696 // variable and is now subject to dead code elimination if there are
697 // no more uses of "b". Variable "x" is a beacon for determining
698 // whether the Store instruction gets dead-code eliminated. If the
699 // Store instruction is eliminated, then it must be the case that
700 // the RMW instruction ends x's live range, and therefore the RMW
701 // instruction will be retained and later lowered. On the other
702 // hand, if the RMW instruction does not end x's live range, then
703 // the Store instruction must still be present, and therefore the
704 // RMW instruction is ignored during lowering because it is
705 // redundant with the Store instruction.
706 //
707 // Note that if "a" has further uses, the RMW transformation may
708 // still trigger, resulting in two loads and one store, which is
709 // worse than the original one load and one store. However, this is
710 // probably rare, and caching probably keeps it just as fast.
711 if (!isSameMemAddressOperand(Load->getSourceAddress(),
712 Store->getAddr()))
713 continue;
Jim Stichnoth8525c322015-06-22 10:36:42 -0700714 Operand *ArithSrcFromLoad = Arith->getSrc(0);
715 Operand *ArithSrcOther = Arith->getSrc(1);
716 if (ArithSrcFromLoad != Load->getDest()) {
717 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
718 continue;
719 std::swap(ArithSrcFromLoad, ArithSrcOther);
720 }
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700721 if (Arith->getDest() != Store->getData())
722 continue;
723 if (!canRMW(Arith))
724 continue;
725 if (Func->isVerbose(IceV_RMW)) {
726 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
727 Load->dump(Func);
728 Str << "\n ";
729 Arith->dump(Func);
730 Str << "\n ";
731 Store->dump(Func);
732 Str << "\n";
733 }
734 Variable *Beacon = Func->makeVariable(IceType_i32);
735 Beacon->setWeight(0);
736 Store->setRmwBeacon(Beacon);
737 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
738 Node->getInsts().insert(I3, BeaconDef);
739 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
Jim Stichnoth8525c322015-06-22 10:36:42 -0700740 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
Jim Stichnothe4f65d82015-06-17 22:16:02 -0700741 Node->getInsts().insert(I3, RMW);
742 }
743 }
744 }
745 }
746 }
747}
748
749namespace {
750
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -0700751// Converts a ConstantInteger32 operand into its constant value, or
752// MemoryOrderInvalid if the operand is not a ConstantInteger32.
753uint64_t getConstantMemoryOrder(Operand *Opnd) {
754 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
755 return Integer->getValue();
756 return Intrinsics::MemoryOrderInvalid;
757}
758
759// Determines whether the dest of a Load instruction can be folded
760// into one of the src operands of a 2-operand instruction. This is
761// true as long as the load dest matches exactly one of the binary
762// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
763// the answer is true.
764bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
765 Operand *&Src0, Operand *&Src1) {
766 if (Src0 == LoadDest && Src1 != LoadDest) {
767 Src0 = LoadSrc;
768 return true;
769 }
770 if (Src0 != LoadDest && Src1 == LoadDest) {
771 Src1 = LoadSrc;
772 return true;
773 }
774 return false;
775}
776
777} // end of anonymous namespace
778
779void TargetX8632::doLoadOpt() {
780 for (CfgNode *Node : Func->getNodes()) {
781 Context.init(Node);
782 while (!Context.atEnd()) {
783 Variable *LoadDest = nullptr;
784 Operand *LoadSrc = nullptr;
785 Inst *CurInst = Context.getCur();
786 Inst *Next = Context.getNextInst();
787 // Determine whether the current instruction is a Load
788 // instruction or equivalent.
789 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
790 // An InstLoad always qualifies.
791 LoadDest = Load->getDest();
792 const bool DoLegalize = false;
793 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
794 LoadDest->getType(), DoLegalize);
795 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
796 // An AtomicLoad intrinsic qualifies as long as it has a valid
797 // memory ordering, and can be implemented in a single
798 // instruction (i.e., not i64).
799 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
800 if (ID == Intrinsics::AtomicLoad &&
801 Intrin->getDest()->getType() != IceType_i64 &&
802 Intrinsics::isMemoryOrderValid(
803 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
804 LoadDest = Intrin->getDest();
805 const bool DoLegalize = false;
806 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
807 DoLegalize);
808 }
809 }
810 // A Load instruction can be folded into the following
811 // instruction only if the following instruction ends the Load's
812 // Dest variable's live range.
813 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
814 assert(LoadSrc);
815 Inst *NewInst = nullptr;
816 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
817 Operand *Src0 = Arith->getSrc(0);
818 Operand *Src1 = Arith->getSrc(1);
819 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
820 NewInst = InstArithmetic::create(Func, Arith->getOp(),
821 Arith->getDest(), Src0, Src1);
822 }
823 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
824 Operand *Src0 = Icmp->getSrc(0);
825 Operand *Src1 = Icmp->getSrc(1);
826 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
827 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
828 Icmp->getDest(), Src0, Src1);
829 }
830 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
831 Operand *Src0 = Fcmp->getSrc(0);
832 Operand *Src1 = Fcmp->getSrc(1);
833 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
834 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
835 Fcmp->getDest(), Src0, Src1);
836 }
837 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
838 Operand *Src0 = Select->getTrueOperand();
839 Operand *Src1 = Select->getFalseOperand();
840 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
841 NewInst = InstSelect::create(Func, Select->getDest(),
842 Select->getCondition(), Src0, Src1);
843 }
844 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
845 // The load dest can always be folded into a Cast
846 // instruction.
847 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
848 if (Src0 == LoadDest) {
849 NewInst = InstCast::create(Func, Cast->getCastKind(),
850 Cast->getDest(), LoadSrc);
851 }
852 }
853 if (NewInst) {
854 CurInst->setDeleted();
855 Next->setDeleted();
856 Context.insert(NewInst);
857 // Update NewInst->LiveRangesEnded so that target lowering
858 // may benefit. Also update NewInst->HasSideEffects.
859 NewInst->spliceLivenessInfo(Next, CurInst);
860 }
861 }
862 Context.advanceCur();
863 Context.advanceNext();
864 }
865 }
866 Func->dump("After load optimization");
867}
868
Jim Stichnothff9c7062014-09-18 04:50:49 -0700869bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
870 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
871 return Br->optimizeBranch(NextNode);
872 }
873 return false;
874}
875
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700876IceString TargetX8632::RegNames[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700877#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700878 frameptr, isI8, isInt, isFP) \
879 name,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800880 REGX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700881#undef X
882};
883
Jim Stichnoth98712a32014-10-24 10:59:02 -0700884Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {
885 if (Ty == IceType_void)
886 Ty = IceType_i32;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700887 if (PhysicalRegisters[Ty].empty())
888 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
889 assert(RegNum < PhysicalRegisters[Ty].size());
890 Variable *Reg = PhysicalRegisters[Ty][RegNum];
Jim Stichnothae953202014-12-20 06:17:49 -0800891 if (Reg == nullptr) {
Jim Stichnoth98712a32014-10-24 10:59:02 -0700892 Reg = Func->makeVariable(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700893 Reg->setRegNum(RegNum);
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700894 PhysicalRegisters[Ty][RegNum] = Reg;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700895 // Specially mark esp as an "argument" so that it is considered
896 // live upon function entry.
Jim Stichnoth47752552014-10-13 17:15:08 -0700897 if (RegNum == RegX8632::Reg_esp) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700898 Func->addImplicitArg(Reg);
Jim Stichnoth47752552014-10-13 17:15:08 -0700899 Reg->setIgnoreLiveness();
900 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700901 }
902 return Reg;
903}
904
905IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
Jan Voungbd385e42014-09-18 18:18:10 -0700906 assert(RegNum < RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700907 static IceString RegNames8[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700908#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700909 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700910 name8,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800911 REGX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700912#undef X
913 };
914 static IceString RegNames16[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700915#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700916 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700917 name16,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800918 REGX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700919#undef X
920 };
921 switch (Ty) {
922 case IceType_i1:
923 case IceType_i8:
924 return RegNames8[RegNum];
925 case IceType_i16:
926 return RegNames16[RegNum];
927 default:
928 return RegNames[RegNum];
929 }
930}
931
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700932void TargetX8632::emitVariable(const Variable *Var) const {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700933 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700934 if (Var->hasReg()) {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700935 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700936 return;
937 }
Qining Lu253dc8a2015-06-22 10:10:23 -0700938 if (Var->getWeight().isInf()) {
Jim Stichnothdd165072014-11-02 09:41:45 -0800939 llvm_unreachable("Infinite-weight Variable has no register assigned");
Qining Lu253dc8a2015-06-22 10:10:23 -0700940 }
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700941 int32_t Offset = Var->getStackOffset();
942 if (!hasFramePointer())
943 Offset += getStackAdjustment();
Jim Stichnothbca2f652014-11-01 10:13:54 -0700944 if (Offset)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700945 Str << Offset;
Jan Voungb2d50842015-05-12 09:53:50 -0700946 const Type FrameSPTy = IceType_i32;
947 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700948}
949
Jan Voung90ccc3f2015-04-30 14:15:10 -0700950X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
Jim Stichnothdd165072014-11-02 09:41:45 -0800951 if (Var->hasReg())
952 llvm_unreachable("Stack Variable has a register assigned");
Qining Lu253dc8a2015-06-22 10:10:23 -0700953 if (Var->getWeight().isInf()) {
Jim Stichnothdd165072014-11-02 09:41:45 -0800954 llvm_unreachable("Infinite-weight Variable has no register assigned");
Qining Lu253dc8a2015-06-22 10:10:23 -0700955 }
Jan Voung8acded02014-09-22 18:02:25 -0700956 int32_t Offset = Var->getStackOffset();
957 if (!hasFramePointer())
958 Offset += getStackAdjustment();
Jan Voung90ccc3f2015-04-30 14:15:10 -0700959 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
Jan Voung8acded02014-09-22 18:02:25 -0700960}
961
Matt Wala45a06232014-07-09 16:33:22 -0700962void TargetX8632::lowerArguments() {
963 VarList &Args = Func->getArgs();
964 // The first four arguments of vector type, regardless of their
965 // position relative to the other arguments in the argument list, are
966 // passed in registers xmm0 - xmm3.
967 unsigned NumXmmArgs = 0;
968
969 Context.init(Func->getEntryNode());
970 Context.setInsertPoint(Context.getCur());
971
972 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
973 ++I) {
974 Variable *Arg = Args[I];
975 Type Ty = Arg->getType();
976 if (!isVectorType(Ty))
977 continue;
978 // Replace Arg in the argument list with the home register. Then
979 // generate an instruction in the prolog to copy the home register
980 // to the assigned location of Arg.
Jan Voungbd385e42014-09-18 18:18:10 -0700981 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
Matt Wala45a06232014-07-09 16:33:22 -0700982 ++NumXmmArgs;
Jim Stichnoth9a04c072014-12-11 15:51:42 -0800983 Variable *RegisterArg = Func->makeVariable(Ty);
984 if (ALLOW_DUMP)
985 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
Matt Wala45a06232014-07-09 16:33:22 -0700986 RegisterArg->setRegNum(RegNum);
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700987 RegisterArg->setIsArg();
988 Arg->setIsArg(false);
Matt Wala45a06232014-07-09 16:33:22 -0700989
990 Args[I] = RegisterArg;
991 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
992 }
993}
994
995// Helper function for addProlog().
996//
997// This assumes Arg is an argument passed on the stack. This sets the
998// frame offset for Arg and updates InArgsSizeBytes according to Arg's
999// width. For an I64 arg that has been split into Lo and Hi components,
1000// it calls itself recursively on the components, taking care to handle
1001// Lo first because of the little-endian architecture. Lastly, this
1002// function generates an instruction to copy Arg into its assigned
1003// register if applicable.
1004void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
1005 size_t BasicFrameOffset,
1006 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001007 Variable *Lo = Arg->getLo();
1008 Variable *Hi = Arg->getHi();
1009 Type Ty = Arg->getType();
1010 if (Lo && Hi && Ty == IceType_i64) {
1011 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
1012 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
Matt Wala45a06232014-07-09 16:33:22 -07001013 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1014 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001015 return;
1016 }
Matt Wala105b7042014-08-11 19:56:19 -07001017 if (isVectorType(Ty)) {
1018 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
1019 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001020 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
Matt Wala45a06232014-07-09 16:33:22 -07001021 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001022 if (Arg->hasReg()) {
1023 assert(Ty != IceType_i64);
1024 OperandX8632Mem *Mem = OperandX8632Mem::create(
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001025 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
Matt Wala45a06232014-07-09 16:33:22 -07001026 if (isVectorType(Arg->getType())) {
1027 _movp(Arg, Mem);
1028 } else {
1029 _mov(Arg, Mem);
1030 }
Jim Stichnoth18735602014-09-16 19:59:35 -07001031 // This argument-copying instruction uses an explicit
1032 // OperandX8632Mem operand instead of a Variable, so its
1033 // fill-from-stack operation has to be tracked separately for
1034 // statistics.
1035 Ctx->statsUpdateFills();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001036 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001037}
1038
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001039Type TargetX8632::stackSlotType() { return IceType_i32; }
1040
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001041void TargetX8632::addProlog(CfgNode *Node) {
Matt Walad4799f42014-08-14 14:24:12 -07001042 // Stack frame layout:
1043 //
1044 // +------------------------+
1045 // | 1. return address |
1046 // +------------------------+
1047 // | 2. preserved registers |
1048 // +------------------------+
1049 // | 3. padding |
1050 // +------------------------+
1051 // | 4. global spill area |
1052 // +------------------------+
1053 // | 5. padding |
1054 // +------------------------+
1055 // | 6. local spill area |
1056 // +------------------------+
1057 // | 7. padding |
1058 // +------------------------+
1059 // | 8. allocas |
1060 // +------------------------+
1061 //
1062 // The following variables record the size in bytes of the given areas:
1063 // * X86_RET_IP_SIZE_BYTES: area 1
1064 // * PreservedRegsSizeBytes: area 2
1065 // * SpillAreaPaddingBytes: area 3
1066 // * GlobalsSize: area 4
1067 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
1068 // * LocalsSpillAreaSize: area 6
1069 // * SpillAreaSizeBytes: areas 3 - 7
1070
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001071 // Determine stack frame offsets for each Variable without a
1072 // register assignment. This can be done as one variable per stack
1073 // slot. Or, do coalescing by running the register allocator again
1074 // with an infinite set of registers (as a side effect, this gives
1075 // variables a second chance at physical register assignment).
1076 //
1077 // A middle ground approach is to leverage sparsity and allocate one
1078 // block of space on the frame for globals (variables with
1079 // multi-block lifetime), and one block to share for locals
1080 // (single-block lifetime).
1081
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001082 Context.init(Node);
1083 Context.setInsertPoint(Context.getCur());
1084
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001085 llvm::SmallBitVector CalleeSaves =
1086 getRegisterSet(RegSet_CalleeSave, RegSet_None);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001087 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001088 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
1089 size_t GlobalsSize = 0;
1090 // If there is a separate locals area, this represents that area.
1091 // Otherwise it counts any variable not counted by GlobalsSize.
1092 SpillAreaSizeBytes = 0;
Matt Walad4799f42014-08-14 14:24:12 -07001093 // If there is a separate locals area, this specifies the alignment
1094 // for it.
1095 uint32_t LocalsSlotsAlignmentBytes = 0;
1096 // The entire spill locations area gets aligned to largest natural
1097 // alignment of the variables that have a spill slot.
1098 uint32_t SpillAreaAlignmentBytes = 0;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001099 // A spill slot linked to a variable with a stack slot should reuse
1100 // that stack slot.
1101 std::function<bool(Variable *)> TargetVarHook =
1102 [&VariablesLinkedToSpillSlots](Variable *Var) {
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -07001103 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
1104 assert(Var->getWeight().isZero());
1105 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
1106 VariablesLinkedToSpillSlots.push_back(Var);
1107 return true;
1108 }
1109 }
1110 return false;
1111 };
Matt Walad4799f42014-08-14 14:24:12 -07001112
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001113 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1114 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1115 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1116 &LocalsSlotsAlignmentBytes, TargetVarHook);
Matt Walad4799f42014-08-14 14:24:12 -07001117 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
Matt Walad4799f42014-08-14 14:24:12 -07001118 SpillAreaSizeBytes += GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001119
1120 // Add push instructions for preserved registers.
Jim Stichnoth18735602014-09-16 19:59:35 -07001121 uint32_t NumCallee = 0;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001122 size_t PreservedRegsSizeBytes = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001123 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1124 if (CalleeSaves[i] && RegsUsed[i]) {
Jim Stichnoth18735602014-09-16 19:59:35 -07001125 ++NumCallee;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001126 PreservedRegsSizeBytes += 4;
Jan Voung0b9eee52014-10-07 11:20:10 -07001127 _push(getPhysicalRegister(i));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001128 }
1129 }
Jim Stichnoth18735602014-09-16 19:59:35 -07001130 Ctx->statsUpdateRegistersSaved(NumCallee);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001131
1132 // Generate "push ebp; mov ebp, esp"
1133 if (IsEbpBasedFrame) {
1134 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
1135 .count() == 0);
1136 PreservedRegsSizeBytes += 4;
Jan Voungbd385e42014-09-18 18:18:10 -07001137 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
1138 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung0b9eee52014-10-07 11:20:10 -07001139 _push(ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001140 _mov(ebp, esp);
Jim Stichnoth76dcf1a2015-04-29 10:20:07 -07001141 // Keep ebp live for late-stage liveness analysis
1142 // (e.g. asm-verbose mode).
1143 Context.insert(InstFakeUse::create(Func, ebp));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001144 }
1145
Matt Walad4799f42014-08-14 14:24:12 -07001146 // Align the variables area. SpillAreaPaddingBytes is the size of
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001147 // the region after the preserved registers and before the spill areas.
1148 // LocalsSlotsPaddingBytes is the amount of padding between the globals
1149 // and locals area if they are separate.
1150 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
1151 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
Matt Walad4799f42014-08-14 14:24:12 -07001152 uint32_t SpillAreaPaddingBytes = 0;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001153 uint32_t LocalsSlotsPaddingBytes = 0;
1154 alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
1155 SpillAreaAlignmentBytes, GlobalsSize,
1156 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
1157 &LocalsSlotsPaddingBytes);
1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1159 uint32_t GlobalsAndSubsequentPaddingSize =
1160 GlobalsSize + LocalsSlotsPaddingBytes;
Matt Walad4799f42014-08-14 14:24:12 -07001161
1162 // Align esp if necessary.
1163 if (NeedsStackAlignment) {
1164 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1166 SpillAreaSizeBytes = StackSize - StackOffset;
1167 }
1168
1169 // Generate "sub esp, SpillAreaSizeBytes"
1170 if (SpillAreaSizeBytes)
Jan Voungbd385e42014-09-18 18:18:10 -07001171 _sub(getPhysicalRegister(RegX8632::Reg_esp),
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001172 Ctx->getConstantInt32(SpillAreaSizeBytes));
Jim Stichnoth18735602014-09-16 19:59:35 -07001173 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001174
1175 resetStackAdjustment();
1176
Matt Wala45a06232014-07-09 16:33:22 -07001177 // Fill in stack offsets for stack args, and copy args into registers
1178 // for those that were register-allocated. Args are pushed right to
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001179 // left, so Arg[0] is closest to the stack/frame pointer.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001180 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Matt Wala105b7042014-08-11 19:56:19 -07001181 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001182 if (!IsEbpBasedFrame)
Matt Walad4799f42014-08-14 14:24:12 -07001183 BasicFrameOffset += SpillAreaSizeBytes;
Matt Wala45a06232014-07-09 16:33:22 -07001184
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001185 const VarList &Args = Func->getArgs();
1186 size_t InArgsSizeBytes = 0;
Matt Wala45a06232014-07-09 16:33:22 -07001187 unsigned NumXmmArgs = 0;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001188 for (Variable *Arg : Args) {
Matt Wala45a06232014-07-09 16:33:22 -07001189 // Skip arguments passed in registers.
1190 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
1191 ++NumXmmArgs;
1192 continue;
1193 }
1194 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001195 }
1196
1197 // Fill in stack offsets for locals.
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001198 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1199 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1200 IsEbpBasedFrame);
Matt Walad4799f42014-08-14 14:24:12 -07001201 // Assign stack offsets to variables that have been linked to spilled
1202 // variables.
Jim Stichnothf44f3712014-10-01 14:05:51 -07001203 for (Variable *Var : VariablesLinkedToSpillSlots) {
Jim Stichnoth800dab22014-09-20 12:25:02 -07001204 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
Matt Walad4799f42014-08-14 14:24:12 -07001205 Var->setStackOffset(Linked->getStackOffset());
1206 }
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001207 this->HasComputedFrame = true;
Matt Walad4799f42014-08-14 14:24:12 -07001208
Jim Stichnothfa4efea2015-01-27 05:06:03 -08001209 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
Jim Stichnothe4a8f402015-01-20 12:52:51 -08001210 OstreamLocker L(Func->getContext());
Matt Walad4799f42014-08-14 14:24:12 -07001211 Ostream &Str = Func->getContext()->getStrDump();
1212
1213 Str << "Stack layout:\n";
1214 uint32_t EspAdjustmentPaddingSize =
1215 SpillAreaSizeBytes - LocalsSpillAreaSize -
1216 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
1217 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1218 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
1219 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1220 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1221 << " globals spill area = " << GlobalsSize << " bytes\n"
1222 << " globals-locals spill areas intermediate padding = "
1223 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1224 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1225 << " esp alignment padding = " << EspAdjustmentPaddingSize
1226 << " bytes\n";
1227
1228 Str << "Stack details:\n"
1229 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
1230 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1231 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1232 << " bytes\n"
1233 << " is ebp based = " << IsEbpBasedFrame << "\n";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001234 }
1235}
1236
1237void TargetX8632::addEpilog(CfgNode *Node) {
1238 InstList &Insts = Node->getInsts();
1239 InstList::reverse_iterator RI, E;
1240 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1241 if (llvm::isa<InstX8632Ret>(*RI))
1242 break;
1243 }
1244 if (RI == E)
1245 return;
1246
1247 // Convert the reverse_iterator position into its corresponding
1248 // (forward) iterator position.
1249 InstList::iterator InsertPoint = RI.base();
1250 --InsertPoint;
1251 Context.init(Node);
1252 Context.setInsertPoint(InsertPoint);
1253
Jan Voungbd385e42014-09-18 18:18:10 -07001254 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001255 if (IsEbpBasedFrame) {
Jan Voungbd385e42014-09-18 18:18:10 -07001256 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
Jim Stichnoth76dcf1a2015-04-29 10:20:07 -07001257 // For late-stage liveness analysis (e.g. asm-verbose mode),
1258 // adding a fake use of esp before the assignment of esp=ebp keeps
1259 // previous esp adjustments from being dead-code eliminated.
1260 Context.insert(InstFakeUse::create(Func, esp));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001261 _mov(esp, ebp);
1262 _pop(ebp);
1263 } else {
Matt Walad4799f42014-08-14 14:24:12 -07001264 // add esp, SpillAreaSizeBytes
1265 if (SpillAreaSizeBytes)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001266 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001267 }
1268
1269 // Add pop instructions for preserved registers.
1270 llvm::SmallBitVector CalleeSaves =
1271 getRegisterSet(RegSet_CalleeSave, RegSet_None);
1272 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1273 SizeT j = CalleeSaves.size() - i - 1;
Jan Voungbd385e42014-09-18 18:18:10 -07001274 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001275 continue;
1276 if (CalleeSaves[j] && RegsUsed[j]) {
1277 _pop(getPhysicalRegister(j));
1278 }
1279 }
Jim Stichnoth9f42d8c2015-02-20 09:20:14 -08001280
1281 if (!Ctx->getFlags().getUseSandboxing())
1282 return;
1283 // Change the original ret instruction into a sandboxed return sequence.
1284 // t:ecx = pop
1285 // bundle_lock
1286 // and t, ~31
1287 // jmp *t
1288 // bundle_unlock
1289 // FakeUse <original_ret_operand>
1290 const SizeT BundleSize = 1
1291 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
1292 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
1293 _pop(T_ecx);
1294 _bundle_lock();
1295 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
1296 _jmp(T_ecx);
1297 _bundle_unlock();
1298 if (RI->getSrcSize()) {
1299 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1300 Context.insert(InstFakeUse::create(Func, RetValue));
1301 }
1302 RI->setDeleted();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001303}
1304
1305void TargetX8632::split64(Variable *Var) {
1306 switch (Var->getType()) {
1307 default:
1308 return;
1309 case IceType_i64:
1310 // TODO: Only consider F64 if we need to push each half when
1311 // passing as an argument to a function call. Note that each half
1312 // is still typed as I32.
1313 case IceType_f64:
1314 break;
1315 }
1316 Variable *Lo = Var->getLo();
1317 Variable *Hi = Var->getHi();
1318 if (Lo) {
1319 assert(Hi);
1320 return;
1321 }
Jim Stichnothae953202014-12-20 06:17:49 -08001322 assert(Hi == nullptr);
Jim Stichnoth9a04c072014-12-11 15:51:42 -08001323 Lo = Func->makeVariable(IceType_i32);
1324 Hi = Func->makeVariable(IceType_i32);
1325 if (ALLOW_DUMP) {
1326 Lo->setName(Func, Var->getName(Func) + "__lo");
1327 Hi->setName(Func, Var->getName(Func) + "__hi");
1328 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001329 Var->setLoHi(Lo, Hi);
1330 if (Var->getIsArg()) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07001331 Lo->setIsArg();
1332 Hi->setIsArg();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001333 }
1334}
1335
1336Operand *TargetX8632::loOperand(Operand *Operand) {
Jim Stichnothc77f8172015-05-31 23:34:44 -07001337 assert(Operand->getType() == IceType_i64 ||
1338 Operand->getType() == IceType_f64);
1339 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001340 return Operand;
1341 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1342 split64(Var);
1343 return Var->getLo();
1344 }
Jan Voungbc004632014-09-16 15:09:10 -07001345 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Qining Lu253dc8a2015-06-22 10:10:23 -07001346 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
1347 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
1348 return legalize(ConstInt);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001349 }
1350 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
Qining Lu253dc8a2015-06-22 10:10:23 -07001351 OperandX8632Mem *MemOperand = OperandX8632Mem::create(
1352 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
1353 Mem->getShift(), Mem->getSegmentRegister());
1354 // Test if we should randomize or pool the offset, if so randomize it or
1355 // pool it then create mem operand with the blinded/pooled constant.
1356 // Otherwise, return the mem operand as ordinary mem operand.
1357 return legalize(MemOperand);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001358 }
1359 llvm_unreachable("Unsupported operand type");
Jim Stichnothae953202014-12-20 06:17:49 -08001360 return nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001361}
1362
1363Operand *TargetX8632::hiOperand(Operand *Operand) {
Jim Stichnothc77f8172015-05-31 23:34:44 -07001364 assert(Operand->getType() == IceType_i64 ||
1365 Operand->getType() == IceType_f64);
1366 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001367 return Operand;
1368 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1369 split64(Var);
1370 return Var->getHi();
1371 }
Jan Voungbc004632014-09-16 15:09:10 -07001372 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Qining Lu253dc8a2015-06-22 10:10:23 -07001373 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
1374 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
1375 // check if we need to blind/pool the constant
1376 return legalize(ConstInt);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001377 }
1378 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1379 Constant *Offset = Mem->getOffset();
Jim Stichnothae953202014-12-20 06:17:49 -08001380 if (Offset == nullptr) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001381 Offset = Ctx->getConstantInt32(4);
1382 } else if (ConstantInteger32 *IntOffset =
1383 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1384 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001385 } else if (ConstantRelocatable *SymOffset =
1386 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
Jan Voungfe14fb82014-10-13 15:56:32 -07001387 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001388 Offset =
1389 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
1390 SymOffset->getSuppressMangling());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001391 }
Qining Lu253dc8a2015-06-22 10:10:23 -07001392 OperandX8632Mem *MemOperand = OperandX8632Mem::create(
1393 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
1394 Mem->getShift(), Mem->getSegmentRegister());
1395 // Test if the Offset is an eligible i32 constants for randomization and
1396 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
1397 // operand.
1398 return legalize(MemOperand);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001399 }
1400 llvm_unreachable("Unsupported operand type");
Jim Stichnothae953202014-12-20 06:17:49 -08001401 return nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001402}
1403
1404llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
1405 RegSetMask Exclude) const {
Jan Voungbd385e42014-09-18 18:18:10 -07001406 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001407
Jan Voungbd385e42014-09-18 18:18:10 -07001408#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001409 frameptr, isI8, isInt, isFP) \
1410 if (scratch && (Include & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001411 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001412 if (preserved && (Include & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001413 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001414 if (stackptr && (Include & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001415 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001416 if (frameptr && (Include & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001417 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001418 if (scratch && (Exclude & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001419 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001420 if (preserved && (Exclude & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001421 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001422 if (stackptr && (Exclude & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001423 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001424 if (frameptr && (Exclude & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001425 Registers[RegX8632::val] = false;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001426
1427 REGX8632_TABLE
1428
1429#undef X
1430
1431 return Registers;
1432}
1433
1434void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
1435 IsEbpBasedFrame = true;
Matt Wala105b7042014-08-11 19:56:19 -07001436 // Conservatively require the stack to be aligned. Some stack
1437 // adjustment operations implemented below assume that the stack is
1438 // aligned before the alloca. All the alloca code ensures that the
1439 // stack alignment is preserved after the alloca. The stack alignment
1440 // restriction can be relaxed in some cases.
1441 NeedsStackAlignment = true;
1442
Jan Voung55500db2015-05-26 14:25:40 -07001443 // TODO(stichnot): minimize the number of adjustments of esp, etc.
Jan Voungbd385e42014-09-18 18:18:10 -07001444 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001445 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1446 Variable *Dest = Inst->getDest();
Matt Wala105b7042014-08-11 19:56:19 -07001447 uint32_t AlignmentParam = Inst->getAlignInBytes();
Jim Stichnoth72a8f8d2014-09-08 17:56:50 -07001448 // For default align=0, set it to the real value 1, to avoid any
1449 // bit-manipulation problems below.
1450 AlignmentParam = std::max(AlignmentParam, 1u);
Matt Wala105b7042014-08-11 19:56:19 -07001451
1452 // LLVM enforces power of 2 alignment.
Jan Voung55500db2015-05-26 14:25:40 -07001453 assert(llvm::isPowerOf2_32(AlignmentParam));
1454 assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES));
Matt Wala105b7042014-08-11 19:56:19 -07001455
1456 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1457 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001458 _and(esp, Ctx->getConstantInt32(-Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001459 }
Jan Voung55500db2015-05-26 14:25:40 -07001460 if (const auto *ConstantTotalSize =
Jan Voungbc004632014-09-16 15:09:10 -07001461 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
Matt Wala105b7042014-08-11 19:56:19 -07001462 uint32_t Value = ConstantTotalSize->getValue();
Jan Voung55500db2015-05-26 14:25:40 -07001463 Value = Utils::applyAlignment(Value, Alignment);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001464 _sub(esp, Ctx->getConstantInt32(Value));
Matt Wala105b7042014-08-11 19:56:19 -07001465 } else {
1466 // Non-constant sizes need to be adjusted to the next highest
1467 // multiple of the required alignment at runtime.
1468 Variable *T = makeReg(IceType_i32);
1469 _mov(T, TotalSize);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001470 _add(T, Ctx->getConstantInt32(Alignment - 1));
1471 _and(T, Ctx->getConstantInt32(-Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001472 _sub(esp, T);
1473 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001474 _mov(Dest, esp);
1475}
1476
Jim Stichnoth0933c0c2015-06-12 10:41:16 -07001477// Strength-reduce scalar integer multiplication by a constant (for
1478// i32 or narrower) for certain constants. The lea instruction can be
1479// used to multiply by 3, 5, or 9, and the lsh instruction can be used
1480// to multiply by powers of 2. These can be combined such that
1481// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
1482// combined with left-shifting by 2.
1483bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0,
1484 int32_t Src1) {
1485 // Disable this optimization for Om1 and O0, just to keep things
1486 // simple there.
1487 if (Ctx->getFlags().getOptLevel() < Opt_1)
1488 return false;
1489 Type Ty = Dest->getType();
1490 Variable *T = nullptr;
1491 if (Src1 == -1) {
1492 _mov(T, Src0);
1493 _neg(T);
1494 _mov(Dest, T);
1495 return true;
1496 }
1497 if (Src1 == 0) {
1498 _mov(Dest, Ctx->getConstantZero(Ty));
1499 return true;
1500 }
1501 if (Src1 == 1) {
1502 _mov(T, Src0);
1503 _mov(Dest, T);
1504 return true;
1505 }
1506 // Don't bother with the edge case where Src1 == MININT.
1507 if (Src1 == -Src1)
1508 return false;
1509 const bool Src1IsNegative = Src1 < 0;
1510 if (Src1IsNegative)
1511 Src1 = -Src1;
1512 uint32_t Count9 = 0;
1513 uint32_t Count5 = 0;
1514 uint32_t Count3 = 0;
1515 uint32_t Count2 = 0;
1516 uint32_t CountOps = 0;
1517 while (Src1 > 1) {
1518 if (Src1 % 9 == 0) {
1519 ++CountOps;
1520 ++Count9;
1521 Src1 /= 9;
1522 } else if (Src1 % 5 == 0) {
1523 ++CountOps;
1524 ++Count5;
1525 Src1 /= 5;
1526 } else if (Src1 % 3 == 0) {
1527 ++CountOps;
1528 ++Count3;
1529 Src1 /= 3;
1530 } else if (Src1 % 2 == 0) {
1531 if (Count2 == 0)
1532 ++CountOps;
1533 ++Count2;
1534 Src1 /= 2;
1535 } else {
1536 return false;
1537 }
1538 }
1539 // Lea optimization only works for i16 and i32 types, not i8.
1540 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1541 return false;
1542 // Limit the number of lea/shl operations for a single multiply, to
1543 // a somewhat arbitrary choice of 3.
1544 const uint32_t MaxOpsForOptimizedMul = 3;
1545 if (CountOps > MaxOpsForOptimizedMul)
1546 return false;
1547 _mov(T, Src0);
1548 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1549 for (uint32_t i = 0; i < Count9; ++i) {
1550 const uint16_t Shift = 3; // log2(9-1)
1551 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
1552 _set_dest_nonkillable();
1553 }
1554 for (uint32_t i = 0; i < Count5; ++i) {
1555 const uint16_t Shift = 2; // log2(5-1)
1556 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
1557 _set_dest_nonkillable();
1558 }
1559 for (uint32_t i = 0; i < Count3; ++i) {
1560 const uint16_t Shift = 1; // log2(3-1)
1561 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
1562 _set_dest_nonkillable();
1563 }
1564 if (Count2) {
1565 _shl(T, Ctx->getConstantInt(Ty, Count2));
1566 }
1567 if (Src1IsNegative)
1568 _neg(T);
1569 _mov(Dest, T);
1570 return true;
1571}
1572
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001573void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1574 Variable *Dest = Inst->getDest();
1575 Operand *Src0 = legalize(Inst->getSrc(0));
1576 Operand *Src1 = legalize(Inst->getSrc(1));
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -07001577 if (Inst->isCommutative()) {
1578 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1579 std::swap(Src0, Src1);
Jim Stichnoth0933c0c2015-06-12 10:41:16 -07001580 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1581 std::swap(Src0, Src1);
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -07001582 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001583 if (Dest->getType() == IceType_i64) {
Qining Lu253dc8a2015-06-22 10:10:23 -07001584 // These helper-call-involved instructions are lowered in this
1585 // separate switch. This is because loOperand() and hiOperand()
1586 // may insert redundant instructions for constant blinding and
1587 // pooling. Such redundant instructions will fail liveness analysis
1588 // under -Om1 setting. And, actually these arguments do not need
1589 // to be processed with loOperand() and hiOperand() to be used.
1590 switch (Inst->getOp()) {
1591 case InstArithmetic::Udiv: {
1592 const SizeT MaxSrcs = 2;
1593 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1594 Call->addArg(Inst->getSrc(0));
1595 Call->addArg(Inst->getSrc(1));
1596 lowerCall(Call);
1597 return;
1598 }
1599 case InstArithmetic::Sdiv: {
1600 const SizeT MaxSrcs = 2;
1601 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
1602 Call->addArg(Inst->getSrc(0));
1603 Call->addArg(Inst->getSrc(1));
1604 lowerCall(Call);
1605 return;
1606 }
1607 case InstArithmetic::Urem: {
1608 const SizeT MaxSrcs = 2;
1609 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
1610 Call->addArg(Inst->getSrc(0));
1611 Call->addArg(Inst->getSrc(1));
1612 lowerCall(Call);
1613 return;
1614 }
1615 case InstArithmetic::Srem: {
1616 const SizeT MaxSrcs = 2;
1617 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
1618 Call->addArg(Inst->getSrc(0));
1619 Call->addArg(Inst->getSrc(1));
1620 lowerCall(Call);
1621 return;
1622 }
1623 default:
1624 break;
1625 }
1626
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001627 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1628 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1629 Operand *Src0Lo = loOperand(Src0);
1630 Operand *Src0Hi = hiOperand(Src0);
1631 Operand *Src1Lo = loOperand(Src1);
1632 Operand *Src1Hi = hiOperand(Src1);
Jim Stichnothae953202014-12-20 06:17:49 -08001633 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001634 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001635 case InstArithmetic::_num:
1636 llvm_unreachable("Unknown arithmetic operator");
1637 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001638 case InstArithmetic::Add:
1639 _mov(T_Lo, Src0Lo);
1640 _add(T_Lo, Src1Lo);
1641 _mov(DestLo, T_Lo);
1642 _mov(T_Hi, Src0Hi);
1643 _adc(T_Hi, Src1Hi);
1644 _mov(DestHi, T_Hi);
1645 break;
1646 case InstArithmetic::And:
1647 _mov(T_Lo, Src0Lo);
1648 _and(T_Lo, Src1Lo);
1649 _mov(DestLo, T_Lo);
1650 _mov(T_Hi, Src0Hi);
1651 _and(T_Hi, Src1Hi);
1652 _mov(DestHi, T_Hi);
1653 break;
1654 case InstArithmetic::Or:
1655 _mov(T_Lo, Src0Lo);
1656 _or(T_Lo, Src1Lo);
1657 _mov(DestLo, T_Lo);
1658 _mov(T_Hi, Src0Hi);
1659 _or(T_Hi, Src1Hi);
1660 _mov(DestHi, T_Hi);
1661 break;
1662 case InstArithmetic::Xor:
1663 _mov(T_Lo, Src0Lo);
1664 _xor(T_Lo, Src1Lo);
1665 _mov(DestLo, T_Lo);
1666 _mov(T_Hi, Src0Hi);
1667 _xor(T_Hi, Src1Hi);
1668 _mov(DestHi, T_Hi);
1669 break;
1670 case InstArithmetic::Sub:
1671 _mov(T_Lo, Src0Lo);
1672 _sub(T_Lo, Src1Lo);
1673 _mov(DestLo, T_Lo);
1674 _mov(T_Hi, Src0Hi);
1675 _sbb(T_Hi, Src1Hi);
1676 _mov(DestHi, T_Hi);
1677 break;
1678 case InstArithmetic::Mul: {
Jim Stichnothae953202014-12-20 06:17:49 -08001679 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jan Voungbd385e42014-09-18 18:18:10 -07001680 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
1681 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001682 // gcc does the following:
1683 // a=b*c ==>
1684 // t1 = b.hi; t1 *=(imul) c.lo
1685 // t2 = c.hi; t2 *=(imul) b.lo
1686 // t3:eax = b.lo
1687 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1688 // a.lo = t4.lo
1689 // t4.hi += t1
1690 // t4.hi += t2
1691 // a.hi = t4.hi
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07001692 // The mul instruction cannot take an immediate operand.
1693 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001694 _mov(T_1, Src0Hi);
1695 _imul(T_1, Src1Lo);
1696 _mov(T_2, Src1Hi);
1697 _imul(T_2, Src0Lo);
Jan Voungbd385e42014-09-18 18:18:10 -07001698 _mov(T_3, Src0Lo, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001699 _mul(T_4Lo, T_3, Src1Lo);
1700 // The mul instruction produces two dest variables, edx:eax. We
1701 // create a fake definition of edx to account for this.
1702 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1703 _mov(DestLo, T_4Lo);
1704 _add(T_4Hi, T_1);
1705 _add(T_4Hi, T_2);
1706 _mov(DestHi, T_4Hi);
1707 } break;
1708 case InstArithmetic::Shl: {
1709 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1710 // gcc does the following:
1711 // a=b<<c ==>
1712 // t1:ecx = c.lo & 0xff
1713 // t2 = b.lo
1714 // t3 = b.hi
1715 // t3 = shld t3, t2, t1
1716 // t2 = shl t2, t1
1717 // test t1, 0x20
1718 // je L1
1719 // use(t3)
1720 // t3 = t2
1721 // t2 = 0
1722 // L1:
1723 // a.lo = t2
1724 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001725 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001726 Constant *BitTest = Ctx->getConstantInt32(0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001727 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001728 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001729 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001730 _mov(T_2, Src0Lo);
1731 _mov(T_3, Src0Hi);
1732 _shld(T_3, T_2, T_1);
1733 _shl(T_2, T_1);
1734 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001735 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001736 // T_2 and T_3 are being assigned again because of the
1737 // intra-block control flow, so we need the _mov_nonkillable
1738 // variant to avoid liveness problems.
1739 _mov_nonkillable(T_3, T_2);
1740 _mov_nonkillable(T_2, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001741 Context.insert(Label);
1742 _mov(DestLo, T_2);
1743 _mov(DestHi, T_3);
1744 } break;
1745 case InstArithmetic::Lshr: {
1746 // a=b>>c (unsigned) ==>
1747 // t1:ecx = c.lo & 0xff
1748 // t2 = b.lo
1749 // t3 = b.hi
1750 // t2 = shrd t2, t3, t1
1751 // t3 = shr t3, t1
1752 // test t1, 0x20
1753 // je L1
1754 // use(t2)
1755 // t2 = t3
1756 // t3 = 0
1757 // L1:
1758 // a.lo = t2
1759 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001760 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001761 Constant *BitTest = Ctx->getConstantInt32(0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001762 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001763 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001764 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001765 _mov(T_2, Src0Lo);
1766 _mov(T_3, Src0Hi);
1767 _shrd(T_2, T_3, T_1);
1768 _shr(T_3, T_1);
1769 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001770 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001771 // T_2 and T_3 are being assigned again because of the
1772 // intra-block control flow, so we need the _mov_nonkillable
1773 // variant to avoid liveness problems.
1774 _mov_nonkillable(T_2, T_3);
1775 _mov_nonkillable(T_3, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001776 Context.insert(Label);
1777 _mov(DestLo, T_2);
1778 _mov(DestHi, T_3);
1779 } break;
1780 case InstArithmetic::Ashr: {
1781 // a=b>>c (signed) ==>
1782 // t1:ecx = c.lo & 0xff
1783 // t2 = b.lo
1784 // t3 = b.hi
1785 // t2 = shrd t2, t3, t1
1786 // t3 = sar t3, t1
1787 // test t1, 0x20
1788 // je L1
1789 // use(t2)
1790 // t2 = t3
1791 // t3 = sar t3, 0x1f
1792 // L1:
1793 // a.lo = t2
1794 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001795 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001796 Constant *BitTest = Ctx->getConstantInt32(0x20);
1797 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001798 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001799 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001800 _mov(T_2, Src0Lo);
1801 _mov(T_3, Src0Hi);
1802 _shrd(T_2, T_3, T_1);
1803 _sar(T_3, T_1);
1804 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001805 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001806 // T_2 and T_3 are being assigned again because of the
1807 // intra-block control flow, so T_2 needs the _mov_nonkillable
1808 // variant to avoid liveness problems. T_3 doesn't need special
1809 // treatment because it is reassigned via _sar instead of _mov.
1810 _mov_nonkillable(T_2, T_3);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001811 _sar(T_3, SignExtend);
1812 Context.insert(Label);
1813 _mov(DestLo, T_2);
1814 _mov(DestHi, T_3);
1815 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001816 case InstArithmetic::Fadd:
1817 case InstArithmetic::Fsub:
1818 case InstArithmetic::Fmul:
1819 case InstArithmetic::Fdiv:
1820 case InstArithmetic::Frem:
1821 llvm_unreachable("FP instruction with i64 type");
1822 break;
Qining Lu253dc8a2015-06-22 10:10:23 -07001823 case InstArithmetic::Udiv:
1824 case InstArithmetic::Sdiv:
1825 case InstArithmetic::Urem:
1826 case InstArithmetic::Srem:
1827 llvm_unreachable("Call-helper-involved instruction for i64 type \
1828 should have already been handled before");
1829 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001830 }
Jim Stichnoth0933c0c2015-06-12 10:41:16 -07001831 return;
1832 }
1833 if (isVectorType(Dest->getType())) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001834 // TODO: Trap on integer divide and integer modulo by zero.
1835 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07001836 if (llvm::isa<OperandX8632Mem>(Src1))
1837 Src1 = legalizeToVar(Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001838 switch (Inst->getOp()) {
1839 case InstArithmetic::_num:
1840 llvm_unreachable("Unknown arithmetic operator");
1841 break;
Matt Wala7fa22d82014-07-17 12:41:31 -07001842 case InstArithmetic::Add: {
1843 Variable *T = makeReg(Dest->getType());
1844 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001845 _padd(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001846 _movp(Dest, T);
1847 } break;
1848 case InstArithmetic::And: {
1849 Variable *T = makeReg(Dest->getType());
1850 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001851 _pand(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001852 _movp(Dest, T);
1853 } break;
1854 case InstArithmetic::Or: {
1855 Variable *T = makeReg(Dest->getType());
1856 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001857 _por(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001858 _movp(Dest, T);
1859 } break;
1860 case InstArithmetic::Xor: {
1861 Variable *T = makeReg(Dest->getType());
1862 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001863 _pxor(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001864 _movp(Dest, T);
1865 } break;
1866 case InstArithmetic::Sub: {
1867 Variable *T = makeReg(Dest->getType());
1868 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001869 _psub(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001870 _movp(Dest, T);
1871 } break;
1872 case InstArithmetic::Mul: {
Matt Wala0a450512014-07-30 12:44:39 -07001873 bool TypesAreValidForPmull =
1874 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1875 bool InstructionSetIsValidForPmull =
1876 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1877 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1878 Variable *T = makeReg(Dest->getType());
1879 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001880 _pmull(T, Src1);
Matt Wala0a450512014-07-30 12:44:39 -07001881 _movp(Dest, T);
1882 } else if (Dest->getType() == IceType_v4i32) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001883 // Lowering sequence:
1884 // Note: The mask arguments have index 0 on the left.
1885 //
1886 // movups T1, Src0
1887 // pshufd T2, Src0, {1,0,3,0}
1888 // pshufd T3, Src1, {1,0,3,0}
1889 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1890 // pmuludq T1, Src1
1891 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1892 // pmuludq T2, T3
1893 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1894 // shufps T1, T2, {0,2,0,2}
1895 // pshufd T4, T1, {0,2,1,3}
1896 // movups Dest, T4
Matt Wala7fa22d82014-07-17 12:41:31 -07001897
1898 // Mask that directs pshufd to create a vector with entries
1899 // Src[1, 0, 3, 0]
1900 const unsigned Constant1030 = 0x31;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001901 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
Matt Wala7fa22d82014-07-17 12:41:31 -07001902 // Mask that directs shufps to create a vector with entries
1903 // Dest[0, 2], Src[0, 2]
1904 const unsigned Mask0202 = 0x88;
1905 // Mask that directs pshufd to create a vector with entries
1906 // Src[0, 2, 1, 3]
1907 const unsigned Mask0213 = 0xd8;
1908 Variable *T1 = makeReg(IceType_v4i32);
1909 Variable *T2 = makeReg(IceType_v4i32);
1910 Variable *T3 = makeReg(IceType_v4i32);
1911 Variable *T4 = makeReg(IceType_v4i32);
1912 _movp(T1, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001913 _pshufd(T2, Src0, Mask1030);
1914 _pshufd(T3, Src1, Mask1030);
1915 _pmuludq(T1, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001916 _pmuludq(T2, T3);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001917 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1918 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
Matt Wala7fa22d82014-07-17 12:41:31 -07001919 _movp(Dest, T4);
Matt Wala7fa22d82014-07-17 12:41:31 -07001920 } else {
1921 assert(Dest->getType() == IceType_v16i8);
Matt Walaafeaee42014-08-07 13:47:30 -07001922 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001923 }
1924 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001925 case InstArithmetic::Shl:
1926 case InstArithmetic::Lshr:
1927 case InstArithmetic::Ashr:
1928 case InstArithmetic::Udiv:
1929 case InstArithmetic::Urem:
1930 case InstArithmetic::Sdiv:
1931 case InstArithmetic::Srem:
1932 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1933 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001934 case InstArithmetic::Fadd: {
1935 Variable *T = makeReg(Dest->getType());
1936 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001937 _addps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001938 _movp(Dest, T);
1939 } break;
1940 case InstArithmetic::Fsub: {
1941 Variable *T = makeReg(Dest->getType());
1942 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001943 _subps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001944 _movp(Dest, T);
1945 } break;
1946 case InstArithmetic::Fmul: {
1947 Variable *T = makeReg(Dest->getType());
1948 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001949 _mulps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001950 _movp(Dest, T);
1951 } break;
1952 case InstArithmetic::Fdiv: {
1953 Variable *T = makeReg(Dest->getType());
1954 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001955 _divps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001956 _movp(Dest, T);
1957 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001958 case InstArithmetic::Frem:
1959 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1960 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001961 }
Jim Stichnoth0933c0c2015-06-12 10:41:16 -07001962 return;
1963 }
1964 Variable *T_edx = nullptr;
1965 Variable *T = nullptr;
1966 switch (Inst->getOp()) {
1967 case InstArithmetic::_num:
1968 llvm_unreachable("Unknown arithmetic operator");
1969 break;
1970 case InstArithmetic::Add:
1971 _mov(T, Src0);
1972 _add(T, Src1);
1973 _mov(Dest, T);
1974 break;
1975 case InstArithmetic::And:
1976 _mov(T, Src0);
1977 _and(T, Src1);
1978 _mov(Dest, T);
1979 break;
1980 case InstArithmetic::Or:
1981 _mov(T, Src0);
1982 _or(T, Src1);
1983 _mov(Dest, T);
1984 break;
1985 case InstArithmetic::Xor:
1986 _mov(T, Src0);
1987 _xor(T, Src1);
1988 _mov(Dest, T);
1989 break;
1990 case InstArithmetic::Sub:
1991 _mov(T, Src0);
1992 _sub(T, Src1);
1993 _mov(Dest, T);
1994 break;
1995 case InstArithmetic::Mul:
1996 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1997 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1998 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001999 }
Jim Stichnoth0933c0c2015-06-12 10:41:16 -07002000 // The 8-bit version of imul only allows the form "imul r/m8"
2001 // where T must be in eax.
2002 if (isByteSizedArithType(Dest->getType())) {
2003 _mov(T, Src0, RegX8632::Reg_eax);
2004 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
2005 } else {
2006 _mov(T, Src0);
2007 }
2008 _imul(T, Src1);
2009 _mov(Dest, T);
2010 break;
2011 case InstArithmetic::Shl:
2012 _mov(T, Src0);
2013 if (!llvm::isa<Constant>(Src1))
2014 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
2015 _shl(T, Src1);
2016 _mov(Dest, T);
2017 break;
2018 case InstArithmetic::Lshr:
2019 _mov(T, Src0);
2020 if (!llvm::isa<Constant>(Src1))
2021 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
2022 _shr(T, Src1);
2023 _mov(Dest, T);
2024 break;
2025 case InstArithmetic::Ashr:
2026 _mov(T, Src0);
2027 if (!llvm::isa<Constant>(Src1))
2028 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
2029 _sar(T, Src1);
2030 _mov(Dest, T);
2031 break;
2032 case InstArithmetic::Udiv:
2033 // div and idiv are the few arithmetic operators that do not allow
2034 // immediates as the operand.
2035 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
2036 if (isByteSizedArithType(Dest->getType())) {
2037 Variable *T_ah = nullptr;
2038 Constant *Zero = Ctx->getConstantZero(IceType_i8);
2039 _mov(T, Src0, RegX8632::Reg_eax);
2040 _mov(T_ah, Zero, RegX8632::Reg_ah);
2041 _div(T, Src1, T_ah);
2042 _mov(Dest, T);
2043 } else {
2044 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2045 _mov(T, Src0, RegX8632::Reg_eax);
2046 _mov(T_edx, Zero, RegX8632::Reg_edx);
2047 _div(T, Src1, T_edx);
2048 _mov(Dest, T);
2049 }
2050 break;
2051 case InstArithmetic::Sdiv:
2052 // TODO(stichnot): Enable this after doing better performance
2053 // and cross testing.
2054 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
2055 // Optimize division by constant power of 2, but not for Om1
2056 // or O0, just to keep things simple there.
2057 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2058 int32_t Divisor = C->getValue();
2059 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
2060 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2061 uint32_t LogDiv = llvm::Log2_32(UDivisor);
2062 Type Ty = Dest->getType();
2063 // LLVM does the following for dest=src/(1<<log):
2064 // t=src
2065 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2066 // shr t,typewidth-log
2067 // add t,src
2068 // sar t,log
2069 // dest=t
2070 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
2071 _mov(T, Src0);
2072 // If for some reason we are dividing by 1, just treat it
2073 // like an assignment.
2074 if (LogDiv > 0) {
2075 // The initial sar is unnecessary when dividing by 2.
2076 if (LogDiv > 1)
2077 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2078 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2079 _add(T, Src0);
2080 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
2081 }
2082 _mov(Dest, T);
2083 return;
2084 }
2085 }
2086 }
2087 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
2088 if (isByteSizedArithType(Dest->getType())) {
2089 _mov(T, Src0, RegX8632::Reg_eax);
2090 _cbwdq(T, T);
2091 _idiv(T, Src1, T);
2092 _mov(Dest, T);
2093 } else {
2094 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
2095 _mov(T, Src0, RegX8632::Reg_eax);
2096 _cbwdq(T_edx, T);
2097 _idiv(T, Src1, T_edx);
2098 _mov(Dest, T);
2099 }
2100 break;
2101 case InstArithmetic::Urem:
2102 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
2103 if (isByteSizedArithType(Dest->getType())) {
2104 Variable *T_ah = nullptr;
2105 Constant *Zero = Ctx->getConstantZero(IceType_i8);
2106 _mov(T, Src0, RegX8632::Reg_eax);
2107 _mov(T_ah, Zero, RegX8632::Reg_ah);
2108 _div(T_ah, Src1, T);
2109 _mov(Dest, T_ah);
2110 } else {
2111 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2112 _mov(T_edx, Zero, RegX8632::Reg_edx);
2113 _mov(T, Src0, RegX8632::Reg_eax);
2114 _div(T_edx, Src1, T);
2115 _mov(Dest, T_edx);
2116 }
2117 break;
2118 case InstArithmetic::Srem:
2119 // TODO(stichnot): Enable this after doing better performance
2120 // and cross testing.
2121 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
2122 // Optimize mod by constant power of 2, but not for Om1 or O0,
2123 // just to keep things simple there.
2124 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2125 int32_t Divisor = C->getValue();
2126 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
2127 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2128 uint32_t LogDiv = llvm::Log2_32(UDivisor);
2129 Type Ty = Dest->getType();
2130 // LLVM does the following for dest=src%(1<<log):
2131 // t=src
2132 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2133 // shr t,typewidth-log
2134 // add t,src
2135 // and t, -(1<<log)
2136 // sub t,src
2137 // neg t
2138 // dest=t
2139 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
2140 // If for some reason we are dividing by 1, just assign 0.
2141 if (LogDiv == 0) {
2142 _mov(Dest, Ctx->getConstantZero(Ty));
2143 return;
2144 }
2145 _mov(T, Src0);
2146 // The initial sar is unnecessary when dividing by 2.
2147 if (LogDiv > 1)
2148 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2149 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2150 _add(T, Src0);
2151 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
2152 _sub(T, Src0);
2153 _neg(T);
2154 _mov(Dest, T);
2155 return;
2156 }
2157 }
2158 }
2159 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
2160 if (isByteSizedArithType(Dest->getType())) {
2161 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
2162 _mov(T, Src0, RegX8632::Reg_eax);
2163 _cbwdq(T, T);
2164 Context.insert(InstFakeDef::create(Func, T_ah));
2165 _idiv(T_ah, Src1, T);
2166 _mov(Dest, T_ah);
2167 } else {
2168 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
2169 _mov(T, Src0, RegX8632::Reg_eax);
2170 _cbwdq(T_edx, T);
2171 _idiv(T_edx, Src1, T);
2172 _mov(Dest, T_edx);
2173 }
2174 break;
2175 case InstArithmetic::Fadd:
2176 _mov(T, Src0);
2177 _addss(T, Src1);
2178 _mov(Dest, T);
2179 break;
2180 case InstArithmetic::Fsub:
2181 _mov(T, Src0);
2182 _subss(T, Src1);
2183 _mov(Dest, T);
2184 break;
2185 case InstArithmetic::Fmul:
2186 _mov(T, Src0);
2187 _mulss(T, Src1);
2188 _mov(Dest, T);
2189 break;
2190 case InstArithmetic::Fdiv:
2191 _mov(T, Src0);
2192 _divss(T, Src1);
2193 _mov(Dest, T);
2194 break;
2195 case InstArithmetic::Frem: {
2196 const SizeT MaxSrcs = 2;
2197 Type Ty = Dest->getType();
2198 InstCall *Call = makeHelperCall(
2199 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
2200 Call->addArg(Src0);
2201 Call->addArg(Src1);
2202 return lowerCall(Call);
2203 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002204 }
2205}
2206
2207void TargetX8632::lowerAssign(const InstAssign *Inst) {
2208 Variable *Dest = Inst->getDest();
2209 Operand *Src0 = Inst->getSrc(0);
2210 assert(Dest->getType() == Src0->getType());
2211 if (Dest->getType() == IceType_i64) {
2212 Src0 = legalize(Src0);
2213 Operand *Src0Lo = loOperand(Src0);
2214 Operand *Src0Hi = hiOperand(Src0);
2215 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2216 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Jim Stichnothae953202014-12-20 06:17:49 -08002217 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002218 _mov(T_Lo, Src0Lo);
2219 _mov(DestLo, T_Lo);
2220 _mov(T_Hi, Src0Hi);
2221 _mov(DestHi, T_Hi);
2222 } else {
Jim Stichnoth5bc44312015-03-02 16:56:20 -08002223 Operand *RI;
Qining Lu253dc8a2015-06-22 10:10:23 -07002224 if (Dest->hasReg()) {
Jim Stichnoth5bc44312015-03-02 16:56:20 -08002225 // If Dest already has a physical register, then legalize the
2226 // Src operand into a Variable with the same register
2227 // assignment. This is mostly a workaround for advanced phi
2228 // lowering's ad-hoc register allocation which assumes no
2229 // register allocation is needed when at least one of the
2230 // operands is non-memory.
Qining Lu253dc8a2015-06-22 10:10:23 -07002231
2232 // If we have a physical register for the dest variable, we can
2233 // enable our constant blinding or pooling again. Note this is
2234 // only for advancedPhiLowering(), the flag flip should leave
2235 // no other side effect.
2236 {
2237 BoolFlagSaver B(RandomizationPoolingPaused, false);
2238 RI = legalize(Src0, Legal_Reg, Dest->getRegNum());
2239 }
2240 } else {
Jim Stichnoth5bc44312015-03-02 16:56:20 -08002241 // If Dest could be a stack operand, then RI must be a physical
2242 // register or a scalar integer immediate.
2243 RI = legalize(Src0, Legal_Reg | Legal_Imm);
Qining Lu253dc8a2015-06-22 10:10:23 -07002244 }
Matt Wala45a06232014-07-09 16:33:22 -07002245 if (isVectorType(Dest->getType()))
2246 _movp(Dest, RI);
2247 else
2248 _mov(Dest, RI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002249 }
2250}
2251
2252void TargetX8632::lowerBr(const InstBr *Inst) {
2253 if (Inst->isUnconditional()) {
2254 _br(Inst->getTargetUnconditional());
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07002255 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002256 }
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07002257 Operand *Cond = Inst->getCondition();
2258
2259 // Handle folding opportunities.
2260 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
2261 assert(Producer->isDeleted());
2262 switch (BoolFolding::getProducerKind(Producer)) {
2263 default:
2264 break;
2265 case BoolFolding::PK_Icmp32: {
2266 // TODO(stichnot): Refactor similarities between this block and
2267 // the corresponding code in lowerIcmp().
2268 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
2269 Operand *Src0 = Producer->getSrc(0);
2270 Operand *Src1 = legalize(Producer->getSrc(1));
2271 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2272 _cmp(Src0RM, Src1);
2273 _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
2274 Inst->getTargetFalse());
2275 return;
2276 }
2277 }
2278 }
2279
2280 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2281 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2282 _cmp(Src0, Zero);
2283 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002284}
2285
2286void TargetX8632::lowerCall(const InstCall *Instr) {
Matt Wala105b7042014-08-11 19:56:19 -07002287 // x86-32 calling convention:
2288 //
2289 // * At the point before the call, the stack must be aligned to 16
2290 // bytes.
2291 //
2292 // * The first four arguments of vector type, regardless of their
2293 // position relative to the other arguments in the argument list, are
2294 // placed in registers xmm0 - xmm3.
2295 //
2296 // * Other arguments are pushed onto the stack in right-to-left order,
2297 // such that the left-most argument ends up on the top of the stack at
2298 // the lowest memory address.
2299 //
2300 // * Stack arguments of vector type are aligned to start at the next
2301 // highest multiple of 16 bytes. Other stack arguments are aligned to
2302 // 4 bytes.
2303 //
2304 // This intends to match the section "IA-32 Function Calling
2305 // Convention" of the document "OS X ABI Function Call Guide" by
2306 // Apple.
2307 NeedsStackAlignment = true;
2308
Jim Stichnoth1502e592014-12-11 09:22:45 -08002309 typedef std::vector<Operand *> OperandList;
Matt Wala105b7042014-08-11 19:56:19 -07002310 OperandList XmmArgs;
2311 OperandList StackArgs, StackArgLocations;
2312 uint32_t ParameterAreaSizeBytes = 0;
2313
Matt Wala45a06232014-07-09 16:33:22 -07002314 // Classify each argument operand according to the location where the
2315 // argument is passed.
Matt Wala45a06232014-07-09 16:33:22 -07002316 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2317 Operand *Arg = Instr->getArg(i);
Matt Wala105b7042014-08-11 19:56:19 -07002318 Type Ty = Arg->getType();
2319 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
Jan Voung3a569182014-09-29 10:16:01 -07002320 assert(typeWidthInBytes(Ty) >= 4);
Matt Wala105b7042014-08-11 19:56:19 -07002321 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
Matt Wala45a06232014-07-09 16:33:22 -07002322 XmmArgs.push_back(Arg);
2323 } else {
2324 StackArgs.push_back(Arg);
Matt Wala105b7042014-08-11 19:56:19 -07002325 if (isVectorType(Arg->getType())) {
2326 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
2327 }
Jan Voungbd385e42014-09-18 18:18:10 -07002328 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002329 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
Matt Wala105b7042014-08-11 19:56:19 -07002330 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
2331 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
Matt Wala45a06232014-07-09 16:33:22 -07002332 }
2333 }
Matt Wala105b7042014-08-11 19:56:19 -07002334
2335 // Adjust the parameter area so that the stack is aligned. It is
2336 // assumed that the stack is already aligned at the start of the
2337 // calling sequence.
2338 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
2339
2340 // Subtract the appropriate amount for the argument area. This also
2341 // takes care of setting the stack adjustment during emission.
Matt Wala45a06232014-07-09 16:33:22 -07002342 //
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002343 // TODO: If for some reason the call instruction gets dead-code
2344 // eliminated after lowering, we would need to ensure that the
Matt Wala105b7042014-08-11 19:56:19 -07002345 // pre-call and the post-call esp adjustment get eliminated as well.
2346 if (ParameterAreaSizeBytes) {
2347 _adjust_stack(ParameterAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002348 }
Matt Wala105b7042014-08-11 19:56:19 -07002349
2350 // Copy arguments that are passed on the stack to the appropriate
2351 // stack locations.
2352 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
2353 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
Matt Wala105b7042014-08-11 19:56:19 -07002354 }
2355
Matt Wala45a06232014-07-09 16:33:22 -07002356 // Copy arguments to be passed in registers to the appropriate
2357 // registers.
2358 // TODO: Investigate the impact of lowering arguments passed in
2359 // registers after lowering stack arguments as opposed to the other
2360 // way around. Lowering register arguments after stack arguments may
2361 // reduce register pressure. On the other hand, lowering register
2362 // arguments first (before stack arguments) may result in more compact
2363 // code, as the memory operand displacements may end up being smaller
2364 // before any stack adjustment is done.
2365 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Jim Stichnothad403532014-09-25 12:44:17 -07002366 Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
Matt Wala45a06232014-07-09 16:33:22 -07002367 // Generate a FakeUse of register arguments so that they do not get
2368 // dead code eliminated as a result of the FakeKill of scratch
2369 // registers after the call.
2370 Context.insert(InstFakeUse::create(Func, Reg));
2371 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002372 // Generate the call instruction. Assign its result to a temporary
2373 // with high register allocation weight.
2374 Variable *Dest = Instr->getDest();
Matt Wala45a06232014-07-09 16:33:22 -07002375 // ReturnReg doubles as ReturnRegLo as necessary.
Jim Stichnothae953202014-12-20 06:17:49 -08002376 Variable *ReturnReg = nullptr;
2377 Variable *ReturnRegHi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002378 if (Dest) {
2379 switch (Dest->getType()) {
2380 case IceType_NUM:
2381 llvm_unreachable("Invalid Call dest type");
2382 break;
2383 case IceType_void:
2384 break;
2385 case IceType_i1:
2386 case IceType_i8:
2387 case IceType_i16:
2388 case IceType_i32:
Jan Voungbd385e42014-09-18 18:18:10 -07002389 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002390 break;
2391 case IceType_i64:
Jan Voungbd385e42014-09-18 18:18:10 -07002392 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
2393 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002394 break;
2395 case IceType_f32:
2396 case IceType_f64:
Jim Stichnothae953202014-12-20 06:17:49 -08002397 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
Matt Wala45a06232014-07-09 16:33:22 -07002398 // the fstp instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002399 break;
Matt Wala928f1292014-07-07 16:50:46 -07002400 case IceType_v4i1:
2401 case IceType_v8i1:
2402 case IceType_v16i1:
2403 case IceType_v16i8:
2404 case IceType_v8i16:
2405 case IceType_v4i32:
Matt Wala45a06232014-07-09 16:33:22 -07002406 case IceType_v4f32:
Jan Voungbd385e42014-09-18 18:18:10 -07002407 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
Matt Wala45a06232014-07-09 16:33:22 -07002408 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002409 }
2410 }
Jim Stichnothdd165072014-11-02 09:41:45 -08002411 Operand *CallTarget = legalize(Instr->getCallTarget());
Jim Stichnoth9f42d8c2015-02-20 09:20:14 -08002412 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
2413 if (NeedSandboxing) {
2414 if (llvm::isa<Constant>(CallTarget)) {
2415 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2416 } else {
2417 Variable *CallTargetVar = nullptr;
2418 _mov(CallTargetVar, CallTarget);
2419 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2420 const SizeT BundleSize =
2421 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
2422 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
2423 CallTarget = CallTargetVar;
2424 }
2425 }
Matt Wala45a06232014-07-09 16:33:22 -07002426 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002427 Context.insert(NewCall);
Jim Stichnoth9f42d8c2015-02-20 09:20:14 -08002428 if (NeedSandboxing)
2429 _bundle_unlock();
Matt Wala45a06232014-07-09 16:33:22 -07002430 if (ReturnRegHi)
2431 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002432
Matt Wala105b7042014-08-11 19:56:19 -07002433 // Add the appropriate offset to esp. The call instruction takes care
2434 // of resetting the stack offset during emission.
2435 if (ParameterAreaSizeBytes) {
Jan Voungbd385e42014-09-18 18:18:10 -07002436 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002437 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002438 }
2439
2440 // Insert a register-kill pseudo instruction.
Jim Stichnoth87ff3a12014-11-14 10:27:29 -08002441 Context.insert(InstFakeKill::create(Func, NewCall));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002442
2443 // Generate a FakeUse to keep the call live if necessary.
Matt Wala45a06232014-07-09 16:33:22 -07002444 if (Instr->hasSideEffects() && ReturnReg) {
2445 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002446 Context.insert(FakeUse);
2447 }
Matt Wala8d1072e2014-07-11 15:43:51 -07002448
Matt Wala45a06232014-07-09 16:33:22 -07002449 if (!Dest)
2450 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002451
Matt Wala45a06232014-07-09 16:33:22 -07002452 // Assign the result of the call to Dest.
2453 if (ReturnReg) {
2454 if (ReturnRegHi) {
2455 assert(Dest->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002456 split64(Dest);
2457 Variable *DestLo = Dest->getLo();
2458 Variable *DestHi = Dest->getHi();
Matt Wala45a06232014-07-09 16:33:22 -07002459 _mov(DestLo, ReturnReg);
2460 _mov(DestHi, ReturnRegHi);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002461 } else {
Matt Wala45a06232014-07-09 16:33:22 -07002462 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
2463 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
2464 isVectorType(Dest->getType()));
Matt Wala45a06232014-07-09 16:33:22 -07002465 if (isVectorType(Dest->getType())) {
2466 _movp(Dest, ReturnReg);
2467 } else {
2468 _mov(Dest, ReturnReg);
2469 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002470 }
Jan Voung3a569182014-09-29 10:16:01 -07002471 } else if (isScalarFloatingType(Dest->getType())) {
Matt Wala45a06232014-07-09 16:33:22 -07002472 // Special treatment for an FP function which returns its result in
2473 // st(0).
Matt Wala45a06232014-07-09 16:33:22 -07002474 // If Dest ends up being a physical xmm register, the fstp emit code
2475 // will route st(0) through a temporary stack slot.
Jim Stichnotha5229722014-09-12 13:06:09 -07002476 _fstp(Dest);
2477 // Create a fake use of Dest in case it actually isn't used,
2478 // because st(0) still needs to be popped.
2479 Context.insert(InstFakeUse::create(Func, Dest));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002480 }
2481}
2482
2483void TargetX8632::lowerCast(const InstCast *Inst) {
2484 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
2485 InstCast::OpKind CastKind = Inst->getCastKind();
2486 Variable *Dest = Inst->getDest();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002487 switch (CastKind) {
2488 default:
2489 Func->setError("Cast type not supported");
2490 return;
Jan Voung1ee34162014-06-24 13:43:30 -07002491 case InstCast::Sext: {
2492 // Src0RM is the source operand legalized to physical register or memory,
2493 // but not immediate, since the relevant x86 native instructions don't
2494 // allow an immediate operand. If the operand is an immediate, we could
2495 // consider computing the strength-reduced result at translation time,
2496 // but we're unlikely to see something like that in the bitcode that
2497 // the optimizer wouldn't have already taken care of.
2498 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002499 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07002500 Type DestTy = Dest->getType();
2501 if (DestTy == IceType_v16i8) {
2502 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
2503 Variable *OneMask = makeVectorOfOnes(Dest->getType());
2504 Variable *T = makeReg(DestTy);
2505 _movp(T, Src0RM);
2506 _pand(T, OneMask);
2507 Variable *Zeros = makeVectorOfZeros(Dest->getType());
2508 _pcmpgt(T, Zeros);
2509 _movp(Dest, T);
2510 } else {
2511 // width = width(elty) - 1; dest = (src << width) >> width
2512 SizeT ShiftAmount =
2513 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002514 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
Matt Wala83b80362014-07-16 10:21:30 -07002515 Variable *T = makeReg(DestTy);
2516 _movp(T, Src0RM);
2517 _psll(T, ShiftConstant);
2518 _psra(T, ShiftConstant);
2519 _movp(Dest, T);
2520 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002521 } else if (Dest->getType() == IceType_i64) {
2522 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002523 Constant *Shift = Ctx->getConstantInt32(31);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002524 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2525 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2526 Variable *T_Lo = makeReg(DestLo->getType());
2527 if (Src0RM->getType() == IceType_i32) {
2528 _mov(T_Lo, Src0RM);
2529 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002530 _movzx(T_Lo, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002531 _shl(T_Lo, Shift);
2532 _sar(T_Lo, Shift);
2533 } else {
2534 _movsx(T_Lo, Src0RM);
2535 }
2536 _mov(DestLo, T_Lo);
Jim Stichnothae953202014-12-20 06:17:49 -08002537 Variable *T_Hi = nullptr;
Jim Stichnothdd30c812014-09-04 16:39:02 -07002538 _mov(T_Hi, T_Lo);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002539 if (Src0RM->getType() != IceType_i1)
2540 // For i1, the sar instruction is already done above.
2541 _sar(T_Hi, Shift);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002542 _mov(DestHi, T_Hi);
2543 } else if (Src0RM->getType() == IceType_i1) {
2544 // t1 = src
2545 // shl t1, dst_bitwidth - 1
2546 // sar t1, dst_bitwidth - 1
2547 // dst = t1
2548 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002549 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002550 Variable *T = makeReg(Dest->getType());
2551 if (typeWidthInBytes(Dest->getType()) <=
2552 typeWidthInBytes(Src0RM->getType())) {
2553 _mov(T, Src0RM);
2554 } else {
2555 // Widen the source using movsx or movzx. (It doesn't matter
2556 // which one, since the following shl/sar overwrite the bits.)
2557 _movzx(T, Src0RM);
2558 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002559 _shl(T, ShiftAmount);
2560 _sar(T, ShiftAmount);
2561 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002562 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002563 // t1 = movsx src; dst = t1
2564 Variable *T = makeReg(Dest->getType());
2565 _movsx(T, Src0RM);
2566 _mov(Dest, T);
2567 }
2568 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002569 }
2570 case InstCast::Zext: {
2571 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002572 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07002573 // onemask = materialize(1,1,...); dest = onemask & src
2574 Type DestTy = Dest->getType();
2575 Variable *OneMask = makeVectorOfOnes(DestTy);
2576 Variable *T = makeReg(DestTy);
2577 _movp(T, Src0RM);
2578 _pand(T, OneMask);
2579 _movp(Dest, T);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002580 } else if (Dest->getType() == IceType_i64) {
2581 // t1=movzx src; dst.lo=t1; dst.hi=0
2582 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2583 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2584 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2585 Variable *Tmp = makeReg(DestLo->getType());
2586 if (Src0RM->getType() == IceType_i32) {
2587 _mov(Tmp, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002588 } else {
2589 _movzx(Tmp, Src0RM);
2590 }
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002591 if (Src0RM->getType() == IceType_i1) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002592 Constant *One = Ctx->getConstantInt32(1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002593 _and(Tmp, One);
2594 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002595 _mov(DestLo, Tmp);
2596 _mov(DestHi, Zero);
2597 } else if (Src0RM->getType() == IceType_i1) {
2598 // t = Src0RM; t &= 1; Dest = t
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002599 Constant *One = Ctx->getConstantInt32(1);
Jan Voung39d4aca2014-10-15 15:16:54 -07002600 Type DestTy = Dest->getType();
2601 Variable *T;
2602 if (DestTy == IceType_i8) {
2603 T = makeReg(DestTy);
2604 _mov(T, Src0RM);
2605 } else {
2606 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
2607 T = makeReg(IceType_i32);
2608 _movzx(T, Src0RM);
2609 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002610 _and(T, One);
2611 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002612 } else {
2613 // t1 = movzx src; dst = t1
2614 Variable *T = makeReg(Dest->getType());
2615 _movzx(T, Src0RM);
2616 _mov(Dest, T);
2617 }
2618 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002619 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002620 case InstCast::Trunc: {
Matt Wala83b80362014-07-16 10:21:30 -07002621 if (isVectorType(Dest->getType())) {
2622 // onemask = materialize(1,1,...); dst = src & onemask
2623 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2624 Type Src0Ty = Src0RM->getType();
2625 Variable *OneMask = makeVectorOfOnes(Src0Ty);
2626 Variable *T = makeReg(Dest->getType());
2627 _movp(T, Src0RM);
2628 _pand(T, OneMask);
2629 _movp(Dest, T);
2630 } else {
2631 Operand *Src0 = Inst->getSrc(0);
2632 if (Src0->getType() == IceType_i64)
2633 Src0 = loOperand(Src0);
2634 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2635 // t1 = trunc Src0RM; Dest = t1
Jim Stichnothae953202014-12-20 06:17:49 -08002636 Variable *T = nullptr;
Matt Wala83b80362014-07-16 10:21:30 -07002637 _mov(T, Src0RM);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002638 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002639 _and(T, Ctx->getConstantInt1(1));
Matt Wala83b80362014-07-16 10:21:30 -07002640 _mov(Dest, T);
2641 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002642 break;
2643 }
2644 case InstCast::Fptrunc:
2645 case InstCast::Fpext: {
Jan Voung1ee34162014-06-24 13:43:30 -07002646 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002647 // t1 = cvt Src0RM; Dest = t1
2648 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002649 _cvt(T, Src0RM, InstX8632Cvt::Float2float);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002650 _mov(Dest, T);
2651 break;
2652 }
2653 case InstCast::Fptosi:
Matt Wala83b80362014-07-16 10:21:30 -07002654 if (isVectorType(Dest->getType())) {
2655 assert(Dest->getType() == IceType_v4i32 &&
2656 Inst->getSrc(0)->getType() == IceType_v4f32);
2657 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002658 if (llvm::isa<OperandX8632Mem>(Src0RM))
2659 Src0RM = legalizeToVar(Src0RM);
Matt Wala83b80362014-07-16 10:21:30 -07002660 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002661 _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
Matt Wala83b80362014-07-16 10:21:30 -07002662 _movp(Dest, T);
2663 } else if (Dest->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002664 // Use a helper for converting floating-point values to 64-bit
2665 // integers. SSE2 appears to have no way to convert from xmm
2666 // registers to something like the edx:eax register pair, and
2667 // gcc and clang both want to use x87 instructions complete with
2668 // temporary manipulation of the status word. This helper is
2669 // not needed for x86-64.
2670 split64(Dest);
2671 const SizeT MaxSrcs = 1;
2672 Type SrcType = Inst->getSrc(0)->getType();
Jim Stichnothc4508792015-03-01 23:12:55 -08002673 InstCall *Call =
2674 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2675 : H_fptosi_f64_i64,
2676 Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002677 Call->addArg(Inst->getSrc(0));
2678 lowerCall(Call);
2679 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002680 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002681 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2682 Variable *T_1 = makeReg(IceType_i32);
2683 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002684 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002685 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002686 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002687 _and(T_2, Ctx->getConstantInt1(1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002688 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002689 }
2690 break;
2691 case InstCast::Fptoui:
Matt Wala83b80362014-07-16 10:21:30 -07002692 if (isVectorType(Dest->getType())) {
2693 assert(Dest->getType() == IceType_v4i32 &&
2694 Inst->getSrc(0)->getType() == IceType_v4f32);
2695 const SizeT MaxSrcs = 1;
Jim Stichnothc4508792015-03-01 23:12:55 -08002696 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
Matt Wala83b80362014-07-16 10:21:30 -07002697 Call->addArg(Inst->getSrc(0));
2698 lowerCall(Call);
2699 } else if (Dest->getType() == IceType_i64 ||
2700 Dest->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002701 // Use a helper for both x86-32 and x86-64.
2702 split64(Dest);
2703 const SizeT MaxSrcs = 1;
2704 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07002705 Type SrcType = Inst->getSrc(0)->getType();
Jim Stichnothc4508792015-03-01 23:12:55 -08002706 IceString TargetString;
2707 if (isInt32Asserting32Or64(DestType)) {
2708 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2709 : H_fptoui_f64_i32;
2710 } else {
2711 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2712 : H_fptoui_f64_i64;
2713 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002714 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2715 Call->addArg(Inst->getSrc(0));
2716 lowerCall(Call);
2717 return;
2718 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002719 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002720 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2721 Variable *T_1 = makeReg(IceType_i32);
2722 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002723 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002724 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002725 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002726 _and(T_2, Ctx->getConstantInt1(1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002727 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002728 }
2729 break;
2730 case InstCast::Sitofp:
Matt Wala83b80362014-07-16 10:21:30 -07002731 if (isVectorType(Dest->getType())) {
2732 assert(Dest->getType() == IceType_v4f32 &&
2733 Inst->getSrc(0)->getType() == IceType_v4i32);
2734 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002735 if (llvm::isa<OperandX8632Mem>(Src0RM))
2736 Src0RM = legalizeToVar(Src0RM);
Matt Wala83b80362014-07-16 10:21:30 -07002737 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002738 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
Matt Wala83b80362014-07-16 10:21:30 -07002739 _movp(Dest, T);
2740 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002741 // Use a helper for x86-32.
2742 const SizeT MaxSrcs = 1;
2743 Type DestType = Dest->getType();
Jim Stichnothc4508792015-03-01 23:12:55 -08002744 InstCall *Call =
2745 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2746 : H_sitofp_i64_f64,
2747 Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002748 // TODO: Call the correct compiler-rt helper function.
2749 Call->addArg(Inst->getSrc(0));
2750 lowerCall(Call);
2751 return;
2752 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002753 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002754 // Sign-extend the operand.
2755 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2756 Variable *T_1 = makeReg(IceType_i32);
2757 Variable *T_2 = makeReg(Dest->getType());
2758 if (Src0RM->getType() == IceType_i32)
2759 _mov(T_1, Src0RM);
2760 else
2761 _movsx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002762 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002763 _mov(Dest, T_2);
2764 }
2765 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002766 case InstCast::Uitofp: {
2767 Operand *Src0 = Inst->getSrc(0);
Matt Wala83b80362014-07-16 10:21:30 -07002768 if (isVectorType(Src0->getType())) {
2769 assert(Dest->getType() == IceType_v4f32 &&
2770 Src0->getType() == IceType_v4i32);
2771 const SizeT MaxSrcs = 1;
Jim Stichnothc4508792015-03-01 23:12:55 -08002772 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
Matt Wala83b80362014-07-16 10:21:30 -07002773 Call->addArg(Src0);
2774 lowerCall(Call);
2775 } else if (Src0->getType() == IceType_i64 ||
2776 Src0->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002777 // Use a helper for x86-32 and x86-64. Also use a helper for
2778 // i32 on x86-32.
2779 const SizeT MaxSrcs = 1;
2780 Type DestType = Dest->getType();
Jim Stichnothc4508792015-03-01 23:12:55 -08002781 IceString TargetString;
2782 if (isInt32Asserting32Or64(Src0->getType())) {
2783 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2784 : H_uitofp_i32_f64;
2785 } else {
2786 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2787 : H_uitofp_i64_f64;
2788 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002789 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Jan Voung1ee34162014-06-24 13:43:30 -07002790 Call->addArg(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002791 lowerCall(Call);
2792 return;
2793 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002794 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002795 // Zero-extend the operand.
2796 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2797 Variable *T_1 = makeReg(IceType_i32);
2798 Variable *T_2 = makeReg(Dest->getType());
2799 if (Src0RM->getType() == IceType_i32)
2800 _mov(T_1, Src0RM);
2801 else
2802 _movzx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002803 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002804 _mov(Dest, T_2);
2805 }
2806 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002807 }
2808 case InstCast::Bitcast: {
2809 Operand *Src0 = Inst->getSrc(0);
2810 if (Dest->getType() == Src0->getType()) {
2811 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002812 lowerAssign(Assign);
2813 return;
2814 }
2815 switch (Dest->getType()) {
2816 default:
2817 llvm_unreachable("Unexpected Bitcast dest type");
Matt Wala83b80362014-07-16 10:21:30 -07002818 case IceType_i8: {
2819 assert(Src0->getType() == IceType_v8i1);
Jim Stichnothc4508792015-03-01 23:12:55 -08002820 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
Matt Wala83b80362014-07-16 10:21:30 -07002821 Call->addArg(Src0);
2822 lowerCall(Call);
2823 } break;
2824 case IceType_i16: {
2825 assert(Src0->getType() == IceType_v16i1);
Jim Stichnothc4508792015-03-01 23:12:55 -08002826 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
Matt Wala83b80362014-07-16 10:21:30 -07002827 Call->addArg(Src0);
2828 lowerCall(Call);
2829 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002830 case IceType_i32:
2831 case IceType_f32: {
Jan Voung1ee34162014-06-24 13:43:30 -07002832 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002833 Type DestType = Dest->getType();
2834 Type SrcType = Src0RM->getType();
Jim Stichnoth6e992142014-07-30 14:45:20 -07002835 (void)DestType;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002836 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2837 (DestType == IceType_f32 && SrcType == IceType_i32));
2838 // a.i32 = bitcast b.f32 ==>
2839 // t.f32 = b.f32
2840 // s.f32 = spill t.f32
2841 // a.i32 = s.f32
Jim Stichnothae953202014-12-20 06:17:49 -08002842 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002843 // TODO: Should be able to force a spill setup by calling legalize() with
2844 // Legal_Mem and not Legal_Reg or Legal_Imm.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002845 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002846 SpillVar->setLinkedTo(Dest);
2847 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002848 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002849 _mov(T, Src0RM);
2850 _mov(Spill, T);
2851 _mov(Dest, Spill);
2852 } break;
2853 case IceType_i64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002854 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002855 assert(Src0RM->getType() == IceType_f64);
2856 // a.i64 = bitcast b.f64 ==>
2857 // s.f64 = spill b.f64
2858 // t_lo.i32 = lo(s.f64)
2859 // a_lo.i32 = t_lo.i32
2860 // t_hi.i32 = hi(s.f64)
2861 // a_hi.i32 = t_hi.i32
Jim Stichnothc77f8172015-05-31 23:34:44 -07002862 Operand *SpillLo, *SpillHi;
2863 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2864 SpillVariable *SpillVar =
2865 Func->makeVariable<SpillVariable>(IceType_f64);
2866 SpillVar->setLinkedTo(Src0Var);
2867 Variable *Spill = SpillVar;
2868 Spill->setWeight(RegWeight::Zero);
2869 _movq(Spill, Src0RM);
2870 SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low);
2871 SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High);
2872 } else {
2873 SpillLo = loOperand(Src0RM);
2874 SpillHi = hiOperand(Src0RM);
2875 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002876
2877 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2878 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2879 Variable *T_Lo = makeReg(IceType_i32);
2880 Variable *T_Hi = makeReg(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002881
2882 _mov(T_Lo, SpillLo);
2883 _mov(DestLo, T_Lo);
2884 _mov(T_Hi, SpillHi);
2885 _mov(DestHi, T_Hi);
2886 } break;
2887 case IceType_f64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002888 Src0 = legalize(Src0);
2889 assert(Src0->getType() == IceType_i64);
Jim Stichnothc77f8172015-05-31 23:34:44 -07002890 if (llvm::isa<OperandX8632Mem>(Src0)) {
2891 Variable *T = Func->makeVariable(Dest->getType());
2892 _movq(T, Src0);
2893 _movq(Dest, T);
2894 break;
2895 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002896 // a.f64 = bitcast b.i64 ==>
2897 // t_lo.i32 = b_lo.i32
Jan Voung1ee34162014-06-24 13:43:30 -07002898 // FakeDef(s.f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002899 // lo(s.f64) = t_lo.i32
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002900 // t_hi.i32 = b_hi.i32
2901 // hi(s.f64) = t_hi.i32
2902 // a.f64 = s.f64
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002903 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002904 SpillVar->setLinkedTo(Dest);
2905 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002906 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002907
Jim Stichnothae953202014-12-20 06:17:49 -08002908 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002909 VariableSplit *SpillLo =
2910 VariableSplit::create(Func, Spill, VariableSplit::Low);
2911 VariableSplit *SpillHi =
2912 VariableSplit::create(Func, Spill, VariableSplit::High);
Jan Voung1ee34162014-06-24 13:43:30 -07002913 _mov(T_Lo, loOperand(Src0));
2914 // Technically, the Spill is defined after the _store happens, but
2915 // SpillLo is considered a "use" of Spill so define Spill before it
2916 // is used.
2917 Context.insert(InstFakeDef::create(Func, Spill));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002918 _store(T_Lo, SpillLo);
Jan Voung1ee34162014-06-24 13:43:30 -07002919 _mov(T_Hi, hiOperand(Src0));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002920 _store(T_Hi, SpillHi);
Jan Voung5cd240d2014-06-25 10:36:46 -07002921 _movq(Dest, Spill);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002922 } break;
Matt Wala83b80362014-07-16 10:21:30 -07002923 case IceType_v8i1: {
2924 assert(Src0->getType() == IceType_i8);
Jim Stichnothc4508792015-03-01 23:12:55 -08002925 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002926 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002927 // Arguments to functions are required to be at least 32 bits wide.
2928 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2929 Call->addArg(Src0AsI32);
2930 lowerCall(Call);
2931 } break;
2932 case IceType_v16i1: {
2933 assert(Src0->getType() == IceType_i16);
Jim Stichnothc4508792015-03-01 23:12:55 -08002934 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002935 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002936 // Arguments to functions are required to be at least 32 bits wide.
2937 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2938 Call->addArg(Src0AsI32);
2939 lowerCall(Call);
2940 } break;
2941 case IceType_v8i16:
2942 case IceType_v16i8:
2943 case IceType_v4i32:
2944 case IceType_v4f32: {
2945 _movp(Dest, legalizeToVar(Src0));
2946 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002947 }
2948 break;
2949 }
Jan Voung1ee34162014-06-24 13:43:30 -07002950 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002951}
2952
Matt Wala49889232014-07-18 12:45:09 -07002953void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002954 Operand *SourceVectNotLegalized = Inst->getSrc(0);
Jan Voungbc004632014-09-16 15:09:10 -07002955 ConstantInteger32 *ElementIndex =
2956 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
Matt Wala49889232014-07-18 12:45:09 -07002957 // Only constant indices are allowed in PNaCl IR.
2958 assert(ElementIndex);
2959
2960 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002961 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002962 Type ElementTy = typeElementType(Ty);
2963 Type InVectorElementTy = getInVectorElementType(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002964 Variable *ExtractedElementR = makeReg(InVectorElementTy);
Matt Wala49889232014-07-18 12:45:09 -07002965
2966 // TODO(wala): Determine the best lowering sequences for each type.
Matt Wala0a450512014-07-30 12:44:39 -07002967 bool CanUsePextr =
2968 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2969 if (CanUsePextr && Ty != IceType_v4f32) {
2970 // Use pextrb, pextrw, or pextrd.
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002971 Constant *Mask = Ctx->getConstantInt32(Index);
Matt Walae3777672014-07-31 09:06:17 -07002972 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2973 _pextr(ExtractedElementR, SourceVectR, Mask);
Matt Wala0a450512014-07-30 12:44:39 -07002974 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2975 // Use pshufd and movd/movss.
Jim Stichnothae953202014-12-20 06:17:49 -08002976 Variable *T = nullptr;
Matt Wala49889232014-07-18 12:45:09 -07002977 if (Index) {
2978 // The shuffle only needs to occur if the element to be extracted
2979 // is not at the lowest index.
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002980 Constant *Mask = Ctx->getConstantInt32(Index);
Matt Wala49889232014-07-18 12:45:09 -07002981 T = makeReg(Ty);
Matt Walad4799f42014-08-14 14:24:12 -07002982 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
Matt Wala49889232014-07-18 12:45:09 -07002983 } else {
Matt Walad4799f42014-08-14 14:24:12 -07002984 T = legalizeToVar(SourceVectNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002985 }
2986
2987 if (InVectorElementTy == IceType_i32) {
Matt Walae3777672014-07-31 09:06:17 -07002988 _movd(ExtractedElementR, T);
Jan Voung3a569182014-09-29 10:16:01 -07002989 } else { // Ty == IceType_f32
Matt Walacfe51462014-07-25 15:57:56 -07002990 // TODO(wala): _movss is only used here because _mov does not
2991 // allow a vector source and a scalar destination. _mov should be
2992 // able to be used here.
2993 // _movss is a binary instruction, so the FakeDef is needed to
2994 // keep the live range analysis consistent.
Matt Walae3777672014-07-31 09:06:17 -07002995 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2996 _movss(ExtractedElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002997 }
Matt Wala49889232014-07-18 12:45:09 -07002998 } else {
2999 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
3000 // Spill the value to a stack slot and do the extraction in memory.
Matt Wala49889232014-07-18 12:45:09 -07003001 //
Matt Walae3777672014-07-31 09:06:17 -07003002 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07003003 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003004 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07003005 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07003006 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07003007
3008 // Compute the location of the element in memory.
3009 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
3010 OperandX8632Mem *Loc =
3011 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07003012 _mov(ExtractedElementR, Loc);
Matt Wala49889232014-07-18 12:45:09 -07003013 }
3014
3015 if (ElementTy == IceType_i1) {
3016 // Truncate extracted integers to i1s if necessary.
3017 Variable *T = makeReg(IceType_i1);
3018 InstCast *Cast =
Matt Walae3777672014-07-31 09:06:17 -07003019 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07003020 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07003021 ExtractedElementR = T;
Matt Wala49889232014-07-18 12:45:09 -07003022 }
3023
3024 // Copy the element to the destination.
3025 Variable *Dest = Inst->getDest();
Matt Walae3777672014-07-31 09:06:17 -07003026 _mov(Dest, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07003027}
3028
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003029void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
3030 Operand *Src0 = Inst->getSrc(0);
3031 Operand *Src1 = Inst->getSrc(1);
3032 Variable *Dest = Inst->getDest();
Matt Walace0ca8f2014-07-24 12:34:20 -07003033
3034 if (isVectorType(Dest->getType())) {
3035 InstFcmp::FCond Condition = Inst->getCondition();
3036 size_t Index = static_cast<size_t>(Condition);
3037 assert(Index < TableFcmpSize);
3038
3039 if (TableFcmp[Index].SwapVectorOperands) {
3040 Operand *T = Src0;
3041 Src0 = Src1;
3042 Src1 = T;
3043 }
3044
Jim Stichnothae953202014-12-20 06:17:49 -08003045 Variable *T = nullptr;
Matt Walace0ca8f2014-07-24 12:34:20 -07003046
Matt Walae3777672014-07-31 09:06:17 -07003047 if (Condition == InstFcmp::True) {
3048 // makeVectorOfOnes() requires an integer vector type.
Matt Walace0ca8f2014-07-24 12:34:20 -07003049 T = makeVectorOfMinusOnes(IceType_v4i32);
Matt Walae3777672014-07-31 09:06:17 -07003050 } else if (Condition == InstFcmp::False) {
3051 T = makeVectorOfZeros(Dest->getType());
3052 } else {
3053 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3054 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003055 if (llvm::isa<OperandX8632Mem>(Src1RM))
3056 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07003057
Matt Walae3777672014-07-31 09:06:17 -07003058 switch (Condition) {
3059 default: {
Jan Voungbd385e42014-09-18 18:18:10 -07003060 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;
3061 assert(Predicate != CondX86::Cmpps_Invalid);
Matt Walae3777672014-07-31 09:06:17 -07003062 T = makeReg(Src0RM->getType());
3063 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07003064 _cmpps(T, Src1RM, Predicate);
Matt Walae3777672014-07-31 09:06:17 -07003065 } break;
3066 case InstFcmp::One: {
3067 // Check both unequal and ordered.
3068 T = makeReg(Src0RM->getType());
3069 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07003070 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07003071 _cmpps(T, Src1RM, CondX86::Cmpps_neq);
Matt Walae3777672014-07-31 09:06:17 -07003072 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07003073 _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
Matt Walae3777672014-07-31 09:06:17 -07003074 _pand(T, T2);
3075 } break;
3076 case InstFcmp::Ueq: {
3077 // Check both equal or unordered.
3078 T = makeReg(Src0RM->getType());
3079 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07003080 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07003081 _cmpps(T, Src1RM, CondX86::Cmpps_eq);
Matt Walae3777672014-07-31 09:06:17 -07003082 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07003083 _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
Matt Walae3777672014-07-31 09:06:17 -07003084 _por(T, T2);
3085 } break;
3086 }
Matt Walae3777672014-07-31 09:06:17 -07003087 }
Matt Walace0ca8f2014-07-24 12:34:20 -07003088
3089 _movp(Dest, T);
3090 eliminateNextVectorSextInstruction(Dest);
3091 return;
3092 }
3093
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003094 // Lowering a = fcmp cond, b, c
3095 // ucomiss b, c /* only if C1 != Br_None */
3096 // /* but swap b,c order if SwapOperands==true */
3097 // mov a, <default>
3098 // j<C1> label /* only if C1 != Br_None */
3099 // j<C2> label /* only if C2 != Br_None */
3100 // FakeUse(a) /* only if C1 != Br_None */
3101 // mov a, !<default> /* only if C1 != Br_None */
3102 // label: /* only if C1 != Br_None */
Jim Stichnothe94740a2015-05-22 13:17:30 -07003103 //
3104 // setcc lowering when C1 != Br_None && C2 == Br_None:
3105 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
3106 // setcc a, C1
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003107 InstFcmp::FCond Condition = Inst->getCondition();
3108 size_t Index = static_cast<size_t>(Condition);
3109 assert(Index < TableFcmpSize);
Jim Stichnothe94740a2015-05-22 13:17:30 -07003110 if (TableFcmp[Index].SwapScalarOperands)
3111 std::swap(Src0, Src1);
Jan Voungbd385e42014-09-18 18:18:10 -07003112 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
3113 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003114 if (HasC1) {
3115 Src0 = legalize(Src0);
3116 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnothae953202014-12-20 06:17:49 -08003117 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003118 _mov(T, Src0);
3119 _ucomiss(T, Src1RM);
Jim Stichnothe94740a2015-05-22 13:17:30 -07003120 if (!HasC2) {
3121 assert(TableFcmp[Index].Default);
3122 _setcc(Dest, TableFcmp[Index].C1);
3123 return;
3124 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003125 }
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003126 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003127 _mov(Dest, Default);
3128 if (HasC1) {
3129 InstX8632Label *Label = InstX8632Label::create(Func, this);
3130 _br(TableFcmp[Index].C1, Label);
3131 if (HasC2) {
3132 _br(TableFcmp[Index].C2, Label);
3133 }
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003134 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);
Jim Stichnoth47752552014-10-13 17:15:08 -07003135 _mov_nonkillable(Dest, NonDefault);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003136 Context.insert(Label);
3137 }
3138}
3139
3140void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
3141 Operand *Src0 = legalize(Inst->getSrc(0));
3142 Operand *Src1 = legalize(Inst->getSrc(1));
3143 Variable *Dest = Inst->getDest();
3144
Matt Wala9a0168a2014-07-23 14:56:10 -07003145 if (isVectorType(Dest->getType())) {
3146 Type Ty = Src0->getType();
3147 // Promote i1 vectors to 128 bit integer vector types.
3148 if (typeElementType(Ty) == IceType_i1) {
3149 Type NewTy = IceType_NUM;
3150 switch (Ty) {
3151 default:
3152 llvm_unreachable("unexpected type");
3153 break;
3154 case IceType_v4i1:
3155 NewTy = IceType_v4i32;
3156 break;
3157 case IceType_v8i1:
3158 NewTy = IceType_v8i16;
3159 break;
3160 case IceType_v16i1:
3161 NewTy = IceType_v16i8;
3162 break;
3163 }
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003164 Variable *NewSrc0 = Func->makeVariable(NewTy);
3165 Variable *NewSrc1 = Func->makeVariable(NewTy);
Matt Wala9a0168a2014-07-23 14:56:10 -07003166 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
3167 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
3168 Src0 = NewSrc0;
3169 Src1 = NewSrc1;
3170 Ty = NewTy;
3171 }
3172
3173 InstIcmp::ICond Condition = Inst->getCondition();
3174
Matt Walae3777672014-07-31 09:06:17 -07003175 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3176 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3177
Matt Wala9a0168a2014-07-23 14:56:10 -07003178 // SSE2 only has signed comparison operations. Transform unsigned
3179 // inputs in a manner that allows for the use of signed comparison
3180 // operations by flipping the high order bits.
3181 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
3182 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
3183 Variable *T0 = makeReg(Ty);
3184 Variable *T1 = makeReg(Ty);
3185 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
Matt Walae3777672014-07-31 09:06:17 -07003186 _movp(T0, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07003187 _pxor(T0, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07003188 _movp(T1, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07003189 _pxor(T1, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07003190 Src0RM = T0;
3191 Src1RM = T1;
Matt Wala9a0168a2014-07-23 14:56:10 -07003192 }
3193
Matt Wala9a0168a2014-07-23 14:56:10 -07003194 Variable *T = makeReg(Ty);
3195 switch (Condition) {
3196 default:
3197 llvm_unreachable("unexpected condition");
3198 break;
3199 case InstIcmp::Eq: {
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003200 if (llvm::isa<OperandX8632Mem>(Src1RM))
3201 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07003202 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07003203 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07003204 } break;
3205 case InstIcmp::Ne: {
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003206 if (llvm::isa<OperandX8632Mem>(Src1RM))
3207 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07003208 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07003209 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07003210 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3211 _pxor(T, MinusOne);
3212 } break;
3213 case InstIcmp::Ugt:
3214 case InstIcmp::Sgt: {
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003215 if (llvm::isa<OperandX8632Mem>(Src1RM))
3216 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07003217 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07003218 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07003219 } break;
3220 case InstIcmp::Uge:
3221 case InstIcmp::Sge: {
Matt Walae3777672014-07-31 09:06:17 -07003222 // !(Src1RM > Src0RM)
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003223 if (llvm::isa<OperandX8632Mem>(Src0RM))
3224 Src0RM = legalizeToVar(Src0RM);
Matt Walae3777672014-07-31 09:06:17 -07003225 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07003226 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07003227 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3228 _pxor(T, MinusOne);
3229 } break;
3230 case InstIcmp::Ult:
3231 case InstIcmp::Slt: {
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003232 if (llvm::isa<OperandX8632Mem>(Src0RM))
3233 Src0RM = legalizeToVar(Src0RM);
Matt Walae3777672014-07-31 09:06:17 -07003234 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07003235 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07003236 } break;
3237 case InstIcmp::Ule:
3238 case InstIcmp::Sle: {
Matt Walae3777672014-07-31 09:06:17 -07003239 // !(Src0RM > Src1RM)
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003240 if (llvm::isa<OperandX8632Mem>(Src1RM))
3241 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07003242 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07003243 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07003244 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3245 _pxor(T, MinusOne);
3246 } break;
3247 }
Matt Wala9a0168a2014-07-23 14:56:10 -07003248
3249 _movp(Dest, T);
Matt Walace0ca8f2014-07-24 12:34:20 -07003250 eliminateNextVectorSextInstruction(Dest);
Matt Wala9a0168a2014-07-23 14:56:10 -07003251 return;
3252 }
3253
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003254 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003255 if (Src0->getType() == IceType_i64) {
3256 InstIcmp::ICond Condition = Inst->getCondition();
3257 size_t Index = static_cast<size_t>(Condition);
3258 assert(Index < TableIcmp64Size);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003259 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3260 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003261 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3262 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
Jim Stichnoth90db6ae2015-05-07 09:35:07 -07003263 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3264 Constant *One = Ctx->getConstantInt32(1);
3265 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
3266 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
3267 _mov(Dest, One);
3268 _cmp(Src0HiRM, Src1HiRI);
3269 if (TableIcmp64[Index].C1 != CondX86::Br_None)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003270 _br(TableIcmp64[Index].C1, LabelTrue);
Jim Stichnoth90db6ae2015-05-07 09:35:07 -07003271 if (TableIcmp64[Index].C2 != CondX86::Br_None)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003272 _br(TableIcmp64[Index].C2, LabelFalse);
Jim Stichnoth90db6ae2015-05-07 09:35:07 -07003273 _cmp(Src0LoRM, Src1LoRI);
3274 _br(TableIcmp64[Index].C3, LabelTrue);
3275 Context.insert(LabelFalse);
3276 _mov_nonkillable(Dest, Zero);
3277 Context.insert(LabelTrue);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003278 return;
3279 }
3280
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003281 // cmp b, c
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07003282 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07003283 _cmp(Src0RM, Src1);
Jim Stichnothf48b3202015-05-04 10:22:17 -07003284 _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003285}
3286
Matt Wala49889232014-07-18 12:45:09 -07003287void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07003288 Operand *SourceVectNotLegalized = Inst->getSrc(0);
3289 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
Jan Voungbc004632014-09-16 15:09:10 -07003290 ConstantInteger32 *ElementIndex =
3291 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
Matt Wala49889232014-07-18 12:45:09 -07003292 // Only constant indices are allowed in PNaCl IR.
3293 assert(ElementIndex);
3294 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07003295 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
Matt Wala49889232014-07-18 12:45:09 -07003296
Matt Walae3777672014-07-31 09:06:17 -07003297 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07003298 Type ElementTy = typeElementType(Ty);
3299 Type InVectorElementTy = getInVectorElementType(Ty);
3300
3301 if (ElementTy == IceType_i1) {
3302 // Expand the element to the appropriate size for it to be inserted
3303 // in the vector.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003304 Variable *Expanded = Func->makeVariable(InVectorElementTy);
Matt Walae3777672014-07-31 09:06:17 -07003305 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
3306 ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07003307 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07003308 ElementToInsertNotLegalized = Expanded;
Matt Wala49889232014-07-18 12:45:09 -07003309 }
3310
Matt Wala0a450512014-07-30 12:44:39 -07003311 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
3312 // Use insertps, pinsrb, pinsrw, or pinsrd.
Matt Walae3777672014-07-31 09:06:17 -07003313 Operand *ElementRM =
3314 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3315 Operand *SourceVectRM =
3316 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07003317 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07003318 _movp(T, SourceVectRM);
Matt Wala0a450512014-07-30 12:44:39 -07003319 if (Ty == IceType_v4f32)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003320 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
Matt Wala0a450512014-07-30 12:44:39 -07003321 else
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003322 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
Matt Wala0a450512014-07-30 12:44:39 -07003323 _movp(Inst->getDest(), T);
3324 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3325 // Use shufps or movss.
Jim Stichnothae953202014-12-20 06:17:49 -08003326 Variable *ElementR = nullptr;
Matt Walae3777672014-07-31 09:06:17 -07003327 Operand *SourceVectRM =
3328 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3329
Matt Wala49889232014-07-18 12:45:09 -07003330 if (InVectorElementTy == IceType_f32) {
Matt Walae3777672014-07-31 09:06:17 -07003331 // ElementR will be in an XMM register since it is floating point.
3332 ElementR = legalizeToVar(ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07003333 } else {
3334 // Copy an integer to an XMM register.
Matt Walae3777672014-07-31 09:06:17 -07003335 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3336 ElementR = makeReg(Ty);
3337 _movd(ElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07003338 }
3339
Matt Walacfe51462014-07-25 15:57:56 -07003340 if (Index == 0) {
3341 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07003342 _movp(T, SourceVectRM);
3343 _movss(T, ElementR);
Matt Walacfe51462014-07-25 15:57:56 -07003344 _movp(Inst->getDest(), T);
3345 return;
3346 }
3347
Matt Wala49889232014-07-18 12:45:09 -07003348 // shufps treats the source and desination operands as vectors of
3349 // four doublewords. The destination's two high doublewords are
3350 // selected from the source operand and the two low doublewords are
3351 // selected from the (original value of) the destination operand.
3352 // An insertelement operation can be effected with a sequence of two
3353 // shufps operations with appropriate masks. In all cases below,
3354 // Element[0] is being inserted into SourceVectOperand. Indices are
3355 // ordered from left to right.
3356 //
Matt Walae3777672014-07-31 09:06:17 -07003357 // insertelement into index 1 (result is stored in ElementR):
3358 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
3359 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
Matt Wala49889232014-07-18 12:45:09 -07003360 //
3361 // insertelement into index 2 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07003362 // T := SourceVectRM
3363 // ElementR := ElementR[0, 0] T[0, 3]
3364 // T := T[0, 1] ElementR[0, 3]
Matt Wala49889232014-07-18 12:45:09 -07003365 //
3366 // insertelement into index 3 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07003367 // T := SourceVectRM
3368 // ElementR := ElementR[0, 0] T[0, 2]
3369 // T := T[0, 1] ElementR[3, 0]
Jim Stichnothdd842db2015-01-27 12:53:53 -08003370 const unsigned char Mask1[3] = {0, 192, 128};
3371 const unsigned char Mask2[3] = {227, 196, 52};
Matt Wala49889232014-07-18 12:45:09 -07003372
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003373 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
3374 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
Matt Wala49889232014-07-18 12:45:09 -07003375
Matt Walacfe51462014-07-25 15:57:56 -07003376 if (Index == 1) {
Matt Walae3777672014-07-31 09:06:17 -07003377 _shufps(ElementR, SourceVectRM, Mask1Constant);
3378 _shufps(ElementR, SourceVectRM, Mask2Constant);
3379 _movp(Inst->getDest(), ElementR);
Matt Wala49889232014-07-18 12:45:09 -07003380 } else {
3381 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07003382 _movp(T, SourceVectRM);
3383 _shufps(ElementR, T, Mask1Constant);
3384 _shufps(T, ElementR, Mask2Constant);
Matt Wala49889232014-07-18 12:45:09 -07003385 _movp(Inst->getDest(), T);
3386 }
Matt Wala49889232014-07-18 12:45:09 -07003387 } else {
3388 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
3389 // Spill the value to a stack slot and perform the insertion in
3390 // memory.
Matt Wala49889232014-07-18 12:45:09 -07003391 //
Matt Walae3777672014-07-31 09:06:17 -07003392 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07003393 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003394 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07003395 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07003396 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07003397
3398 // Compute the location of the position to insert in memory.
3399 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
3400 OperandX8632Mem *Loc =
3401 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07003402 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
Matt Wala49889232014-07-18 12:45:09 -07003403
3404 Variable *T = makeReg(Ty);
3405 _movp(T, Slot);
3406 _movp(Inst->getDest(), T);
3407 }
3408}
3409
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003410void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003411 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003412 case Intrinsics::AtomicCmpxchg: {
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003413 if (!Intrinsics::isMemoryOrderValid(
3414 ID, getConstantMemoryOrder(Instr->getArg(3)),
3415 getConstantMemoryOrder(Instr->getArg(4)))) {
3416 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
Jan Voung5cd240d2014-06-25 10:36:46 -07003417 return;
3418 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003419 Variable *DestPrev = Instr->getDest();
3420 Operand *PtrToMem = Instr->getArg(0);
3421 Operand *Expected = Instr->getArg(1);
3422 Operand *Desired = Instr->getArg(2);
Jan Voungc820ddf2014-07-29 14:38:51 -07003423 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
3424 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07003425 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
Jan Voung5cd240d2014-06-25 10:36:46 -07003426 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07003427 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003428 case Intrinsics::AtomicFence:
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003429 if (!Intrinsics::isMemoryOrderValid(
3430 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003431 Func->setError("Unexpected memory ordering for AtomicFence");
3432 return;
3433 }
3434 _mfence();
3435 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003436 case Intrinsics::AtomicFenceAll:
Jan Voung5cd240d2014-06-25 10:36:46 -07003437 // NOTE: FenceAll should prevent and load/store from being moved
3438 // across the fence (both atomic and non-atomic). The InstX8632Mfence
3439 // instruction is currently marked coarsely as "HasSideEffects".
3440 _mfence();
3441 return;
3442 case Intrinsics::AtomicIsLockFree: {
3443 // X86 is always lock free for 8/16/32/64 bit accesses.
3444 // TODO(jvoung): Since the result is constant when given a constant
3445 // byte size, this opens up DCE opportunities.
3446 Operand *ByteSize = Instr->getArg(0);
3447 Variable *Dest = Instr->getDest();
Jan Voungbc004632014-09-16 15:09:10 -07003448 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003449 Constant *Result;
3450 switch (CI->getValue()) {
3451 default:
3452 // Some x86-64 processors support the cmpxchg16b intruction, which
3453 // can make 16-byte operations lock free (when used with the LOCK
3454 // prefix). However, that's not supported in 32-bit mode, so just
3455 // return 0 even for large sizes.
3456 Result = Ctx->getConstantZero(IceType_i32);
3457 break;
3458 case 1:
3459 case 2:
3460 case 4:
3461 case 8:
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003462 Result = Ctx->getConstantInt32(1);
Jan Voung5cd240d2014-06-25 10:36:46 -07003463 break;
3464 }
3465 _mov(Dest, Result);
3466 return;
3467 }
3468 // The PNaCl ABI requires the byte size to be a compile-time constant.
3469 Func->setError("AtomicIsLockFree byte size should be compile-time const");
3470 return;
3471 }
3472 case Intrinsics::AtomicLoad: {
3473 // We require the memory address to be naturally aligned.
3474 // Given that is the case, then normal loads are atomic.
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003475 if (!Intrinsics::isMemoryOrderValid(
3476 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003477 Func->setError("Unexpected memory ordering for AtomicLoad");
3478 return;
3479 }
3480 Variable *Dest = Instr->getDest();
3481 if (Dest->getType() == IceType_i64) {
3482 // Follow what GCC does and use a movq instead of what lowerLoad()
3483 // normally does (split the load into two).
3484 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
3485 // can't happen anyway, since this is x86-32 and integer arithmetic only
3486 // happens on 32-bit quantities.
3487 Variable *T = makeReg(IceType_f64);
Jan Voungbefd03a2015-06-02 11:03:03 -07003488 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
Jan Voung5cd240d2014-06-25 10:36:46 -07003489 _movq(T, Addr);
3490 // Then cast the bits back out of the XMM register to the i64 Dest.
3491 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
3492 lowerCast(Cast);
Jan Vounge6e497d2014-07-30 10:06:03 -07003493 // Make sure that the atomic load isn't elided when unused.
Jan Voung5cd240d2014-06-25 10:36:46 -07003494 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
3495 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
3496 return;
3497 }
3498 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
3499 lowerLoad(Load);
Jan Vounge6e497d2014-07-30 10:06:03 -07003500 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
3501 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
3502 // insert the FakeUse on the last-inserted instruction's dest.
Jim Stichnoth6e992142014-07-30 14:45:20 -07003503 Context.insert(
3504 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
Jan Voung5cd240d2014-06-25 10:36:46 -07003505 return;
3506 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003507 case Intrinsics::AtomicRMW:
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003508 if (!Intrinsics::isMemoryOrderValid(
3509 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003510 Func->setError("Unexpected memory ordering for AtomicRMW");
3511 return;
3512 }
Jim Stichnothcac003e2015-06-18 12:48:58 -07003513 lowerAtomicRMW(
3514 Instr->getDest(),
3515 static_cast<uint32_t>(
3516 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
3517 Instr->getArg(1), Instr->getArg(2));
Jan Voung5cd240d2014-06-25 10:36:46 -07003518 return;
3519 case Intrinsics::AtomicStore: {
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003520 if (!Intrinsics::isMemoryOrderValid(
3521 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003522 Func->setError("Unexpected memory ordering for AtomicStore");
3523 return;
3524 }
3525 // We require the memory address to be naturally aligned.
3526 // Given that is the case, then normal stores are atomic.
3527 // Add a fence after the store to make it visible.
3528 Operand *Value = Instr->getArg(0);
3529 Operand *Ptr = Instr->getArg(1);
3530 if (Value->getType() == IceType_i64) {
3531 // Use a movq instead of what lowerStore() normally does
3532 // (split the store into two), following what GCC does.
3533 // Cast the bits from int -> to an xmm register first.
3534 Variable *T = makeReg(IceType_f64);
3535 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
3536 lowerCast(Cast);
3537 // Then store XMM w/ a movq.
Jan Voungbefd03a2015-06-02 11:03:03 -07003538 OperandX8632Mem *Addr = formMemoryOperand(Ptr, IceType_f64);
Jan Voung5cd240d2014-06-25 10:36:46 -07003539 _storeq(T, Addr);
3540 _mfence();
3541 return;
3542 }
3543 InstStore *Store = InstStore::create(Func, Value, Ptr);
3544 lowerStore(Store);
3545 _mfence();
3546 return;
3547 }
Jan Voung7fa813b2014-07-18 13:01:08 -07003548 case Intrinsics::Bswap: {
3549 Variable *Dest = Instr->getDest();
3550 Operand *Val = Instr->getArg(0);
3551 // In 32-bit mode, bswap only works on 32-bit arguments, and the
3552 // argument must be a register. Use rotate left for 16-bit bswap.
3553 if (Val->getType() == IceType_i64) {
3554 Variable *T_Lo = legalizeToVar(loOperand(Val));
3555 Variable *T_Hi = legalizeToVar(hiOperand(Val));
3556 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3557 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3558 _bswap(T_Lo);
3559 _bswap(T_Hi);
3560 _mov(DestLo, T_Hi);
3561 _mov(DestHi, T_Lo);
3562 } else if (Val->getType() == IceType_i32) {
3563 Variable *T = legalizeToVar(Val);
3564 _bswap(T);
3565 _mov(Dest, T);
3566 } else {
3567 assert(Val->getType() == IceType_i16);
3568 Val = legalize(Val);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003569 Constant *Eight = Ctx->getConstantInt16(8);
Jim Stichnothae953202014-12-20 06:17:49 -08003570 Variable *T = nullptr;
Jan Voung7fa813b2014-07-18 13:01:08 -07003571 _mov(T, Val);
3572 _rol(T, Eight);
3573 _mov(Dest, T);
3574 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003575 return;
Jan Voung7fa813b2014-07-18 13:01:08 -07003576 }
Jan Vounge4da26f2014-07-15 17:52:39 -07003577 case Intrinsics::Ctpop: {
3578 Variable *Dest = Instr->getDest();
3579 Operand *Val = Instr->getArg(0);
Jim Stichnothc4508792015-03-01 23:12:55 -08003580 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
3581 ? H_call_ctpop_i32
3582 : H_call_ctpop_i64,
3583 Dest, 1);
Jan Vounge4da26f2014-07-15 17:52:39 -07003584 Call->addArg(Val);
3585 lowerCall(Call);
3586 // The popcount helpers always return 32-bit values, while the intrinsic's
3587 // signature matches the native POPCNT instruction and fills a 64-bit reg
3588 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
3589 // the user doesn't do that in the IR. If the user does that in the IR,
3590 // then this zero'ing instruction is dead and gets optimized out.
3591 if (Val->getType() == IceType_i64) {
3592 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3593 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3594 _mov(DestHi, Zero);
3595 }
3596 return;
3597 }
3598 case Intrinsics::Ctlz: {
3599 // The "is zero undef" parameter is ignored and we always return
3600 // a well-defined value.
3601 Operand *Val = legalize(Instr->getArg(0));
3602 Operand *FirstVal;
Jim Stichnothae953202014-12-20 06:17:49 -08003603 Operand *SecondVal = nullptr;
Jan Vounge4da26f2014-07-15 17:52:39 -07003604 if (Val->getType() == IceType_i64) {
3605 FirstVal = loOperand(Val);
3606 SecondVal = hiOperand(Val);
3607 } else {
3608 FirstVal = Val;
3609 }
3610 const bool IsCttz = false;
3611 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3612 SecondVal);
3613 return;
3614 }
3615 case Intrinsics::Cttz: {
3616 // The "is zero undef" parameter is ignored and we always return
3617 // a well-defined value.
3618 Operand *Val = legalize(Instr->getArg(0));
3619 Operand *FirstVal;
Jim Stichnothae953202014-12-20 06:17:49 -08003620 Operand *SecondVal = nullptr;
Jan Vounge4da26f2014-07-15 17:52:39 -07003621 if (Val->getType() == IceType_i64) {
3622 FirstVal = hiOperand(Val);
3623 SecondVal = loOperand(Val);
3624 } else {
3625 FirstVal = Val;
3626 }
3627 const bool IsCttz = true;
3628 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3629 SecondVal);
3630 return;
3631 }
Jim Stichnoth8c980d02015-03-19 13:01:50 -07003632 case Intrinsics::Fabs: {
3633 Operand *Src = legalize(Instr->getArg(0));
3634 Type Ty = Src->getType();
3635 Variable *Dest = Instr->getDest();
3636 Variable *T = makeVectorOfFabsMask(Ty);
Jim Stichnoth5dfdf5f2015-03-20 12:56:07 -07003637 // The pand instruction operates on an m128 memory operand, so if
3638 // Src is an f32 or f64, we need to make sure it's in a register.
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003639 if (isVectorType(Ty)) {
3640 if (llvm::isa<OperandX8632Mem>(Src))
3641 Src = legalizeToVar(Src);
3642 } else {
Jim Stichnoth5dfdf5f2015-03-20 12:56:07 -07003643 Src = legalizeToVar(Src);
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003644 }
Jim Stichnoth8c980d02015-03-19 13:01:50 -07003645 _pand(T, Src);
3646 if (isVectorType(Ty))
3647 _movp(Dest, T);
3648 else
3649 _mov(Dest, T);
3650 return;
3651 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003652 case Intrinsics::Longjmp: {
Jim Stichnothc4508792015-03-01 23:12:55 -08003653 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003654 Call->addArg(Instr->getArg(0));
3655 Call->addArg(Instr->getArg(1));
3656 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003657 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003658 }
3659 case Intrinsics::Memcpy: {
3660 // In the future, we could potentially emit an inline memcpy/memset, etc.
3661 // for intrinsic calls w/ a known length.
Jim Stichnothc4508792015-03-01 23:12:55 -08003662 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003663 Call->addArg(Instr->getArg(0));
3664 Call->addArg(Instr->getArg(1));
3665 Call->addArg(Instr->getArg(2));
3666 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003667 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003668 }
3669 case Intrinsics::Memmove: {
Jim Stichnothc4508792015-03-01 23:12:55 -08003670 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003671 Call->addArg(Instr->getArg(0));
3672 Call->addArg(Instr->getArg(1));
3673 Call->addArg(Instr->getArg(2));
3674 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003675 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003676 }
3677 case Intrinsics::Memset: {
3678 // The value operand needs to be extended to a stack slot size
Matt Wala105b7042014-08-11 19:56:19 -07003679 // because the PNaCl ABI requires arguments to be at least 32 bits
3680 // wide.
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003681 Operand *ValOp = Instr->getArg(1);
3682 assert(ValOp->getType() == IceType_i8);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003683 Variable *ValExt = Func->makeVariable(stackSlotType());
Jan Voung957c50d2014-07-21 14:05:29 -07003684 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jim Stichnothc4508792015-03-01 23:12:55 -08003685 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003686 Call->addArg(Instr->getArg(0));
3687 Call->addArg(ValExt);
3688 Call->addArg(Instr->getArg(2));
3689 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003690 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003691 }
3692 case Intrinsics::NaClReadTP: {
Karl Schimpfdf80eb82015-02-09 14:20:22 -08003693 if (Ctx->getFlags().getUseSandboxing()) {
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003694 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnothae953202014-12-20 06:17:49 -08003695 Operand *Src =
3696 OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0,
3697 OperandX8632Mem::SegReg_GS);
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003698 Variable *Dest = Instr->getDest();
Jim Stichnothae953202014-12-20 06:17:49 -08003699 Variable *T = nullptr;
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003700 _mov(T, Src);
3701 _mov(Dest, T);
3702 } else {
Jim Stichnothc4508792015-03-01 23:12:55 -08003703 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003704 lowerCall(Call);
3705 }
Jan Voung5cd240d2014-06-25 10:36:46 -07003706 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003707 }
3708 case Intrinsics::Setjmp: {
Jim Stichnothc4508792015-03-01 23:12:55 -08003709 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003710 Call->addArg(Instr->getArg(0));
3711 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003712 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003713 }
Jan Voungf37fbbe2014-07-09 16:13:13 -07003714 case Intrinsics::Sqrt: {
3715 Operand *Src = legalize(Instr->getArg(0));
3716 Variable *Dest = Instr->getDest();
3717 Variable *T = makeReg(Dest->getType());
3718 _sqrtss(T, Src);
3719 _mov(Dest, T);
3720 return;
3721 }
Jan Voung7b34b592014-07-18 13:12:58 -07003722 case Intrinsics::Stacksave: {
Jan Voungbd385e42014-09-18 18:18:10 -07003723 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung7b34b592014-07-18 13:12:58 -07003724 Variable *Dest = Instr->getDest();
3725 _mov(Dest, esp);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003726 return;
Jan Voung7b34b592014-07-18 13:12:58 -07003727 }
3728 case Intrinsics::Stackrestore: {
Jan Voungbd385e42014-09-18 18:18:10 -07003729 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth47752552014-10-13 17:15:08 -07003730 _mov_nonkillable(esp, Instr->getArg(0));
Jan Voung7b34b592014-07-18 13:12:58 -07003731 return;
3732 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003733 case Intrinsics::Trap:
3734 _ud2();
Jan Voung5cd240d2014-06-25 10:36:46 -07003735 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003736 case Intrinsics::UnknownIntrinsic:
3737 Func->setError("Should not be lowering UnknownIntrinsic");
3738 return;
3739 }
3740 return;
3741}
3742
Jan Vounga3a01a22014-07-14 10:32:41 -07003743void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
3744 Operand *Expected, Operand *Desired) {
3745 if (Expected->getType() == IceType_i64) {
3746 // Reserve the pre-colored registers first, before adding any more
Jan Voungbefd03a2015-06-02 11:03:03 -07003747 // infinite-weight variables from formMemoryOperand's legalization.
Jan Voungbd385e42014-09-18 18:18:10 -07003748 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3749 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3750 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3751 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003752 _mov(T_eax, loOperand(Expected));
3753 _mov(T_edx, hiOperand(Expected));
3754 _mov(T_ebx, loOperand(Desired));
3755 _mov(T_ecx, hiOperand(Desired));
Jan Voungbefd03a2015-06-02 11:03:03 -07003756 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
Jan Vounga3a01a22014-07-14 10:32:41 -07003757 const bool Locked = true;
3758 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3759 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3760 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3761 _mov(DestLo, T_eax);
3762 _mov(DestHi, T_edx);
3763 return;
3764 }
Jan Voungbd385e42014-09-18 18:18:10 -07003765 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003766 _mov(T_eax, Expected);
Jan Voungbefd03a2015-06-02 11:03:03 -07003767 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
Jan Vounga3a01a22014-07-14 10:32:41 -07003768 Variable *DesiredReg = legalizeToVar(Desired);
3769 const bool Locked = true;
3770 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3771 _mov(DestPrev, T_eax);
3772}
3773
Jan Voungc820ddf2014-07-29 14:38:51 -07003774bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
3775 Operand *Expected,
3776 Operand *Desired) {
Jan Voung1f47ad02015-03-20 15:01:26 -07003777 if (Ctx->getFlags().getOptLevel() == Opt_m1)
Jan Voungc820ddf2014-07-29 14:38:51 -07003778 return false;
3779 // Peek ahead a few instructions and see how Dest is used.
3780 // It's very common to have:
3781 //
3782 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3783 // [%y_phi = ...] // list of phi stores
3784 // %p = icmp eq i32 %x, %expected
3785 // br i1 %p, label %l1, label %l2
3786 //
3787 // which we can optimize into:
3788 //
3789 // %x = <cmpxchg code>
3790 // [%y_phi = ...] // list of phi stores
3791 // br eq, %l1, %l2
3792 InstList::iterator I = Context.getCur();
3793 // I is currently the InstIntrinsicCall. Peek past that.
3794 // This assumes that the atomic cmpxchg has not been lowered yet,
3795 // so that the instructions seen in the scan from "Cur" is simple.
3796 assert(llvm::isa<InstIntrinsicCall>(*I));
3797 Inst *NextInst = Context.getNextInst(I);
3798 if (!NextInst)
3799 return false;
3800 // There might be phi assignments right before the compare+branch, since this
3801 // could be a backward branch for a loop. This placement of assignments is
3802 // determined by placePhiStores().
3803 std::vector<InstAssign *> PhiAssigns;
3804 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3805 if (PhiAssign->getDest() == Dest)
3806 return false;
3807 PhiAssigns.push_back(PhiAssign);
3808 NextInst = Context.getNextInst(I);
3809 if (!NextInst)
3810 return false;
3811 }
3812 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3813 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3814 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3815 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3816 return false;
3817 }
3818 NextInst = Context.getNextInst(I);
3819 if (!NextInst)
3820 return false;
3821 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3822 if (!NextBr->isUnconditional() &&
3823 NextCmp->getDest() == NextBr->getCondition() &&
3824 NextBr->isLastUse(NextCmp->getDest())) {
3825 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3826 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3827 // Lower the phi assignments now, before the branch (same placement
3828 // as before).
3829 InstAssign *PhiAssign = PhiAssigns[i];
Jan Voungc820ddf2014-07-29 14:38:51 -07003830 PhiAssign->setDeleted();
Jim Stichnoth89d79562014-08-27 13:50:03 -07003831 lowerAssign(PhiAssign);
Jan Voungc820ddf2014-07-29 14:38:51 -07003832 Context.advanceNext();
3833 }
Jan Voungbd385e42014-09-18 18:18:10 -07003834 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
Jan Voungc820ddf2014-07-29 14:38:51 -07003835 // Skip over the old compare and branch, by deleting them.
3836 NextCmp->setDeleted();
3837 NextBr->setDeleted();
3838 Context.advanceNext();
3839 Context.advanceNext();
3840 return true;
3841 }
3842 }
3843 }
3844 return false;
3845}
3846
Jan Voung5cd240d2014-06-25 10:36:46 -07003847void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3848 Operand *Ptr, Operand *Val) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003849 bool NeedsCmpxchg = false;
Jim Stichnothae953202014-12-20 06:17:49 -08003850 LowerBinOp Op_Lo = nullptr;
3851 LowerBinOp Op_Hi = nullptr;
Jan Voung5cd240d2014-06-25 10:36:46 -07003852 switch (Operation) {
3853 default:
3854 Func->setError("Unknown AtomicRMW operation");
3855 return;
3856 case Intrinsics::AtomicAdd: {
3857 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003858 // All the fall-through paths must set this to true, but use this
3859 // for asserting.
3860 NeedsCmpxchg = true;
3861 Op_Lo = &TargetX8632::_add;
3862 Op_Hi = &TargetX8632::_adc;
3863 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003864 }
Jan Voungbefd03a2015-06-02 11:03:03 -07003865 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jan Voung5cd240d2014-06-25 10:36:46 -07003866 const bool Locked = true;
Jim Stichnothae953202014-12-20 06:17:49 -08003867 Variable *T = nullptr;
Jan Voung5cd240d2014-06-25 10:36:46 -07003868 _mov(T, Val);
3869 _xadd(Addr, T, Locked);
3870 _mov(Dest, T);
3871 return;
3872 }
3873 case Intrinsics::AtomicSub: {
3874 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003875 NeedsCmpxchg = true;
3876 Op_Lo = &TargetX8632::_sub;
3877 Op_Hi = &TargetX8632::_sbb;
3878 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003879 }
Jan Voungbefd03a2015-06-02 11:03:03 -07003880 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jan Vounga3a01a22014-07-14 10:32:41 -07003881 const bool Locked = true;
Jim Stichnothae953202014-12-20 06:17:49 -08003882 Variable *T = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003883 _mov(T, Val);
3884 _neg(T);
3885 _xadd(Addr, T, Locked);
3886 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003887 return;
3888 }
3889 case Intrinsics::AtomicOr:
Jan Vounga3a01a22014-07-14 10:32:41 -07003890 // TODO(jvoung): If Dest is null or dead, then some of these
3891 // operations do not need an "exchange", but just a locked op.
3892 // That appears to be "worth" it for sub, or, and, and xor.
3893 // xadd is probably fine vs lock add for add, and xchg is fine
3894 // vs an atomic store.
3895 NeedsCmpxchg = true;
3896 Op_Lo = &TargetX8632::_or;
3897 Op_Hi = &TargetX8632::_or;
3898 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003899 case Intrinsics::AtomicAnd:
Jan Vounga3a01a22014-07-14 10:32:41 -07003900 NeedsCmpxchg = true;
3901 Op_Lo = &TargetX8632::_and;
3902 Op_Hi = &TargetX8632::_and;
3903 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003904 case Intrinsics::AtomicXor:
Jan Vounga3a01a22014-07-14 10:32:41 -07003905 NeedsCmpxchg = true;
3906 Op_Lo = &TargetX8632::_xor;
3907 Op_Hi = &TargetX8632::_xor;
3908 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003909 case Intrinsics::AtomicExchange:
Jan Vounga3a01a22014-07-14 10:32:41 -07003910 if (Dest->getType() == IceType_i64) {
3911 NeedsCmpxchg = true;
3912 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3913 // just need to be moved to the ecx and ebx registers.
Jim Stichnothae953202014-12-20 06:17:49 -08003914 Op_Lo = nullptr;
3915 Op_Hi = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003916 break;
3917 }
Jan Voungbefd03a2015-06-02 11:03:03 -07003918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jim Stichnothae953202014-12-20 06:17:49 -08003919 Variable *T = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003920 _mov(T, Val);
3921 _xchg(Addr, T);
3922 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003923 return;
3924 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003925 // Otherwise, we need a cmpxchg loop.
Jim Stichnoth6e992142014-07-30 14:45:20 -07003926 (void)NeedsCmpxchg;
Jan Vounga3a01a22014-07-14 10:32:41 -07003927 assert(NeedsCmpxchg);
3928 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3929}
3930
3931void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
3932 Variable *Dest, Operand *Ptr,
3933 Operand *Val) {
3934 // Expand a more complex RMW operation as a cmpxchg loop:
3935 // For 64-bit:
3936 // mov eax, [ptr]
3937 // mov edx, [ptr + 4]
3938 // .LABEL:
3939 // mov ebx, eax
3940 // <Op_Lo> ebx, <desired_adj_lo>
3941 // mov ecx, edx
3942 // <Op_Hi> ecx, <desired_adj_hi>
3943 // lock cmpxchg8b [ptr]
3944 // jne .LABEL
3945 // mov <dest_lo>, eax
3946 // mov <dest_lo>, edx
3947 //
3948 // For 32-bit:
3949 // mov eax, [ptr]
3950 // .LABEL:
3951 // mov <reg>, eax
3952 // op <reg>, [desired_adj]
3953 // lock cmpxchg [ptr], <reg>
3954 // jne .LABEL
3955 // mov <dest>, eax
3956 //
Jim Stichnothae953202014-12-20 06:17:49 -08003957 // If Op_{Lo,Hi} are nullptr, then just copy the value.
Jan Vounga3a01a22014-07-14 10:32:41 -07003958 Val = legalize(Val);
3959 Type Ty = Val->getType();
3960 if (Ty == IceType_i64) {
Jan Voungbd385e42014-09-18 18:18:10 -07003961 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3962 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
Jan Voungbefd03a2015-06-02 11:03:03 -07003963 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
Jan Vounga3a01a22014-07-14 10:32:41 -07003964 _mov(T_eax, loOperand(Addr));
3965 _mov(T_edx, hiOperand(Addr));
Jan Voungbd385e42014-09-18 18:18:10 -07003966 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3967 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003968 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothae953202014-12-20 06:17:49 -08003969 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003970 if (!IsXchg8b) {
3971 Context.insert(Label);
3972 _mov(T_ebx, T_eax);
3973 (this->*Op_Lo)(T_ebx, loOperand(Val));
3974 _mov(T_ecx, T_edx);
3975 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3976 } else {
3977 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3978 // It just needs the Val loaded into ebx and ecx.
3979 // That can also be done before the loop.
3980 _mov(T_ebx, loOperand(Val));
3981 _mov(T_ecx, hiOperand(Val));
3982 Context.insert(Label);
3983 }
3984 const bool Locked = true;
3985 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003986 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003987 if (!IsXchg8b) {
3988 // If Val is a variable, model the extended live range of Val through
3989 // the end of the loop, since it will be re-used by the loop.
3990 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3991 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3992 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3993 Context.insert(InstFakeUse::create(Func, ValLo));
3994 Context.insert(InstFakeUse::create(Func, ValHi));
3995 }
3996 } else {
3997 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3998 Context.insert(InstFakeUse::create(Func, T_ebx));
3999 Context.insert(InstFakeUse::create(Func, T_ecx));
4000 }
Jim Stichnoth27c56bf2015-03-23 10:29:58 -07004001 // The address base (if any) is also reused in the loop.
4002 if (Variable *Base = Addr->getBase())
4003 Context.insert(InstFakeUse::create(Func, Base));
Jan Vounga3a01a22014-07-14 10:32:41 -07004004 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4005 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4006 _mov(DestLo, T_eax);
4007 _mov(DestHi, T_edx);
4008 return;
4009 }
Jan Voungbefd03a2015-06-02 11:03:03 -07004010 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
Jan Voungbd385e42014-09-18 18:18:10 -07004011 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07004012 _mov(T_eax, Addr);
4013 InstX8632Label *Label = InstX8632Label::create(Func, this);
4014 Context.insert(Label);
4015 // We want to pick a different register for T than Eax, so don't use
Jim Stichnothae953202014-12-20 06:17:49 -08004016 // _mov(T == nullptr, T_eax).
Jan Vounga3a01a22014-07-14 10:32:41 -07004017 Variable *T = makeReg(Ty);
4018 _mov(T, T_eax);
4019 (this->*Op_Lo)(T, Val);
4020 const bool Locked = true;
4021 _cmpxchg(Addr, T_eax, T, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07004022 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07004023 // If Val is a variable, model the extended live range of Val through
4024 // the end of the loop, since it will be re-used by the loop.
4025 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
4026 Context.insert(InstFakeUse::create(Func, ValVar));
4027 }
Jim Stichnoth27c56bf2015-03-23 10:29:58 -07004028 // The address base (if any) is also reused in the loop.
4029 if (Variable *Base = Addr->getBase())
4030 Context.insert(InstFakeUse::create(Func, Base));
Jan Vounga3a01a22014-07-14 10:32:41 -07004031 _mov(Dest, T_eax);
Jan Voung5cd240d2014-06-25 10:36:46 -07004032}
4033
Jan Vounge4da26f2014-07-15 17:52:39 -07004034// Lowers count {trailing, leading} zeros intrinsic.
4035//
4036// We could do constant folding here, but that should have
4037// been done by the front-end/middle-end optimizations.
4038void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
4039 Operand *FirstVal, Operand *SecondVal) {
4040 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
4041 // Then the instructions will handle the Val == 0 case much more simply
4042 // and won't require conversion from bit position to number of zeros.
4043 //
4044 // Otherwise:
4045 // bsr IF_NOT_ZERO, Val
4046 // mov T_DEST, 63
4047 // cmovne T_DEST, IF_NOT_ZERO
4048 // xor T_DEST, 31
4049 // mov DEST, T_DEST
4050 //
4051 // NOTE: T_DEST must be a register because cmov requires its dest to be a
4052 // register. Also, bsf and bsr require their dest to be a register.
4053 //
4054 // The xor DEST, 31 converts a bit position to # of leading zeroes.
4055 // E.g., for 000... 00001100, bsr will say that the most significant bit
4056 // set is at position 3, while the number of leading zeros is 28. Xor is
4057 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
4058 //
4059 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
4060 // are all zero, and compute the result for that case (checking the lower
4061 // 32 bits). Then actually compute the result for the upper bits and
4062 // cmov in the result from the lower computation if the earlier speculation
4063 // was correct.
4064 //
4065 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
4066 // bit position conversion, and the speculation is reversed.
4067 assert(Ty == IceType_i32 || Ty == IceType_i64);
4068 Variable *T = makeReg(IceType_i32);
Matt Wala53c5e602014-07-22 16:39:38 -07004069 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
Jan Vounge4da26f2014-07-15 17:52:39 -07004070 if (Cttz) {
Matt Wala53c5e602014-07-22 16:39:38 -07004071 _bsf(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07004072 } else {
Matt Wala53c5e602014-07-22 16:39:38 -07004073 _bsr(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07004074 }
4075 Variable *T_Dest = makeReg(IceType_i32);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004076 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
4077 Constant *ThirtyOne = Ctx->getConstantInt32(31);
Jan Vounge4da26f2014-07-15 17:52:39 -07004078 if (Cttz) {
4079 _mov(T_Dest, ThirtyTwo);
4080 } else {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004081 Constant *SixtyThree = Ctx->getConstantInt32(63);
Jan Vounge4da26f2014-07-15 17:52:39 -07004082 _mov(T_Dest, SixtyThree);
4083 }
Jan Voungbd385e42014-09-18 18:18:10 -07004084 _cmov(T_Dest, T, CondX86::Br_ne);
Jan Vounge4da26f2014-07-15 17:52:39 -07004085 if (!Cttz) {
4086 _xor(T_Dest, ThirtyOne);
4087 }
4088 if (Ty == IceType_i32) {
4089 _mov(Dest, T_Dest);
4090 return;
4091 }
4092 _add(T_Dest, ThirtyTwo);
4093 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4094 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4095 // Will be using "test" on this, so we need a registerized variable.
4096 Variable *SecondVar = legalizeToVar(SecondVal);
4097 Variable *T_Dest2 = makeReg(IceType_i32);
4098 if (Cttz) {
4099 _bsf(T_Dest2, SecondVar);
4100 } else {
4101 _bsr(T_Dest2, SecondVar);
4102 _xor(T_Dest2, ThirtyOne);
4103 }
4104 _test(SecondVar, SecondVar);
Jan Voungbd385e42014-09-18 18:18:10 -07004105 _cmov(T_Dest2, T_Dest, CondX86::Br_e);
Jan Vounge4da26f2014-07-15 17:52:39 -07004106 _mov(DestLo, T_Dest2);
4107 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
4108}
4109
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004110namespace {
4111
4112bool isAdd(const Inst *Inst) {
4113 if (const InstArithmetic *Arith =
4114 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
4115 return (Arith->getOp() == InstArithmetic::Add);
4116 }
4117 return false;
4118}
4119
Jim Stichnoth89d79562014-08-27 13:50:03 -07004120void dumpAddressOpt(const Cfg *Func, const Variable *Base,
4121 const Variable *Index, uint16_t Shift, int32_t Offset,
4122 const Inst *Reason) {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08004123 if (!ALLOW_DUMP)
4124 return;
Jim Stichnothfa4efea2015-01-27 05:06:03 -08004125 if (!Func->isVerbose(IceV_AddrOpt))
Jim Stichnoth89d79562014-08-27 13:50:03 -07004126 return;
Jim Stichnothe4a8f402015-01-20 12:52:51 -08004127 OstreamLocker L(Func->getContext());
Jim Stichnoth89d79562014-08-27 13:50:03 -07004128 Ostream &Str = Func->getContext()->getStrDump();
4129 Str << "Instruction: ";
4130 Reason->dumpDecorated(Func);
4131 Str << " results in Base=";
4132 if (Base)
4133 Base->dump(Func);
4134 else
4135 Str << "<null>";
4136 Str << ", Index=";
4137 if (Index)
4138 Index->dump(Func);
4139 else
4140 Str << "<null>";
4141 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
4142}
4143
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004144bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var,
4145 const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004146 // Var originates from Var=SrcVar ==>
4147 // set Var:=SrcVar
Jim Stichnothae953202014-12-20 06:17:49 -08004148 if (Var == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004149 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07004150 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
4151 assert(!VMetadata->isMultiDef(Var));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004152 if (llvm::isa<InstAssign>(VarAssign)) {
4153 Operand *SrcOp = VarAssign->getSrc(0);
4154 assert(SrcOp);
4155 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004156 if (!VMetadata->isMultiDef(SrcVar) &&
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004157 // TODO: ensure SrcVar stays single-BB
4158 true) {
4159 Var = SrcVar;
4160 Reason = VarAssign;
4161 return true;
4162 }
4163 }
4164 }
4165 }
4166 return false;
4167}
4168
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004169bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable *&Base,
4170 Variable *&Index, uint16_t &Shift,
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004171 const Inst *&Reason) {
Jim Stichnothae953202014-12-20 06:17:49 -08004172 // Index==nullptr && Base is Base=Var1+Var2 ==>
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004173 // set Base=Var1, Index=Var2, Shift=0
Jim Stichnothae953202014-12-20 06:17:49 -08004174 if (Base == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004175 return false;
Jim Stichnothae953202014-12-20 06:17:49 -08004176 if (Index != nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004177 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07004178 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothae953202014-12-20 06:17:49 -08004179 if (BaseInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004180 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07004181 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004182 if (BaseInst->getSrcSize() < 2)
4183 return false;
4184 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004185 if (VMetadata->isMultiDef(Var1))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004186 return false;
4187 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004188 if (VMetadata->isMultiDef(Var2))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004189 return false;
4190 if (isAdd(BaseInst) &&
4191 // TODO: ensure Var1 and Var2 stay single-BB
4192 true) {
4193 Base = Var1;
4194 Index = Var2;
4195 Shift = 0; // should already have been 0
4196 Reason = BaseInst;
4197 return true;
4198 }
4199 }
4200 }
4201 return false;
4202}
4203
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004204bool matchShiftedIndex(const VariablesMetadata *VMetadata, Variable *&Index,
4205 uint16_t &Shift, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004206 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
4207 // Index=Var, Shift+=log2(Const)
Jim Stichnothae953202014-12-20 06:17:49 -08004208 if (Index == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004209 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07004210 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
Jim Stichnothae953202014-12-20 06:17:49 -08004211 if (IndexInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004212 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07004213 assert(!VMetadata->isMultiDef(Index));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004214 if (IndexInst->getSrcSize() < 2)
4215 return false;
4216 if (const InstArithmetic *ArithInst =
4217 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
4218 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
Jan Voungbc004632014-09-16 15:09:10 -07004219 if (ConstantInteger32 *Const =
4220 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004221 if (ArithInst->getOp() == InstArithmetic::Mul &&
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004222 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004223 uint64_t Mult = Const->getValue();
4224 uint32_t LogMult;
4225 switch (Mult) {
4226 case 1:
4227 LogMult = 0;
4228 break;
4229 case 2:
4230 LogMult = 1;
4231 break;
4232 case 4:
4233 LogMult = 2;
4234 break;
4235 case 8:
4236 LogMult = 3;
4237 break;
4238 default:
4239 return false;
4240 }
4241 if (Shift + LogMult <= 3) {
4242 Index = Var;
4243 Shift += LogMult;
4244 Reason = IndexInst;
4245 return true;
4246 }
4247 }
4248 }
4249 }
4250 }
4251 return false;
4252}
4253
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004254bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
4255 int32_t &Offset, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004256 // Base is Base=Var+Const || Base is Base=Const+Var ==>
4257 // set Base=Var, Offset+=Const
4258 // Base is Base=Var-Const ==>
4259 // set Base=Var, Offset-=Const
Jim Stichnothae953202014-12-20 06:17:49 -08004260 if (Base == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004261 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07004262 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothae953202014-12-20 06:17:49 -08004263 if (BaseInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004264 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07004265 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004266 if (const InstArithmetic *ArithInst =
4267 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
4268 if (ArithInst->getOp() != InstArithmetic::Add &&
4269 ArithInst->getOp() != InstArithmetic::Sub)
4270 return false;
4271 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
Jim Stichnothae953202014-12-20 06:17:49 -08004272 Variable *Var = nullptr;
4273 ConstantInteger32 *Const = nullptr;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004274 if (Variable *VariableOperand =
4275 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
4276 Var = VariableOperand;
Jan Voungbc004632014-09-16 15:09:10 -07004277 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004278 } else if (IsAdd) {
Jan Voungbc004632014-09-16 15:09:10 -07004279 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004280 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
4281 }
Jim Stichnothae953202014-12-20 06:17:49 -08004282 if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004283 return false;
Jan Voungbc004632014-09-16 15:09:10 -07004284 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
Jan Voung8acded02014-09-22 18:02:25 -07004285 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
Jan Voungbc004632014-09-16 15:09:10 -07004286 return false;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004287 Base = Var;
Jan Voungbc004632014-09-16 15:09:10 -07004288 Offset += MoreOffset;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004289 Reason = BaseInst;
4290 return true;
4291 }
4292 return false;
4293}
4294
Jim Stichnoth89d79562014-08-27 13:50:03 -07004295void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
4296 Variable *&Index, uint16_t &Shift, int32_t &Offset) {
Jim Stichnoth800dab22014-09-20 12:25:02 -07004297 Func->resetCurrentNode();
Jim Stichnothfa4efea2015-01-27 05:06:03 -08004298 if (Func->isVerbose(IceV_AddrOpt)) {
Jim Stichnothe4a8f402015-01-20 12:52:51 -08004299 OstreamLocker L(Func->getContext());
Jim Stichnoth89d79562014-08-27 13:50:03 -07004300 Ostream &Str = Func->getContext()->getStrDump();
4301 Str << "\nStarting computeAddressOpt for instruction:\n ";
4302 Instr->dumpDecorated(Func);
4303 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004304 (void)Offset; // TODO: pattern-match for non-zero offsets.
Jim Stichnothae953202014-12-20 06:17:49 -08004305 if (Base == nullptr)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004306 return;
4307 // If the Base has more than one use or is live across multiple
4308 // blocks, then don't go further. Alternatively (?), never consider
4309 // a transformation that would change a variable that is currently
4310 // *not* live across basic block boundaries into one that *is*.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004311 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004312 return;
4313
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004314 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004315 bool Continue = true;
4316 while (Continue) {
Jim Stichnothae953202014-12-20 06:17:49 -08004317 const Inst *Reason = nullptr;
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004318 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
4319 matchTransitiveAssign(VMetadata, Index, Reason) ||
4320 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
4321 matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
4322 matchOffsetBase(VMetadata, Base, Offset, Reason)) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07004323 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
4324 } else {
4325 Continue = false;
Matt Wala8835b892014-08-11 17:46:58 -07004326 }
4327
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004328 // Index is Index=Var<<Const && Const+Shift<=3 ==>
4329 // Index=Var, Shift+=Const
4330
4331 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
4332 // Index=Var, Shift+=log2(Const)
4333
4334 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
4335 // swap(Index,Base)
4336 // Similar for Base=Const*Var and Base=Var<<Const
4337
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004338 // Index is Index=Var+Const ==>
4339 // set Index=Var, Offset+=(Const<<Shift)
4340
4341 // Index is Index=Const+Var ==>
4342 // set Index=Var, Offset+=(Const<<Shift)
4343
4344 // Index is Index=Var-Const ==>
4345 // set Index=Var, Offset-=(Const<<Shift)
4346
4347 // TODO: consider overflow issues with respect to Offset.
4348 // TODO: handle symbolic constants.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004349 }
4350}
4351
4352} // anonymous namespace
4353
Jim Stichnothc77f8172015-05-31 23:34:44 -07004354void TargetX8632::lowerLoad(const InstLoad *Load) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004355 // A Load instruction can be treated the same as an Assign
4356 // instruction, after the source operand is transformed into an
4357 // OperandX8632Mem operand. Note that the address mode
4358 // optimization already creates an OperandX8632Mem operand, so it
4359 // doesn't need another level of transformation.
Jim Stichnothc77f8172015-05-31 23:34:44 -07004360 Variable *DestLoad = Load->getDest();
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -07004361 Type Ty = DestLoad->getType();
4362 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
Jim Stichnothc77f8172015-05-31 23:34:44 -07004363 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004364 lowerAssign(Assign);
4365}
4366
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004367void TargetX8632::doAddressOptLoad() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08004368 Inst *Inst = Context.getCur();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004369 Variable *Dest = Inst->getDest();
4370 Operand *Addr = Inst->getSrc(0);
Jim Stichnothae953202014-12-20 06:17:49 -08004371 Variable *Index = nullptr;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004372 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004373 int32_t Offset = 0; // TODO: make Constant
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004374 // Vanilla ICE load instructions should not use the segment registers,
4375 // and computeAddressOpt only works at the level of Variables and Constants,
4376 // not other OperandX8632Mem, so there should be no mention of segment
4377 // registers there either.
4378 const OperandX8632Mem::SegmentRegisters SegmentReg =
4379 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004380 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jim Stichnoth89d79562014-08-27 13:50:03 -07004381 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004382 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004383 Inst->setDeleted();
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004384 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004385 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004386 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004387 Context.insert(InstLoad::create(Func, Dest, Addr));
4388 }
4389}
4390
Matt Walac3302742014-08-15 16:21:56 -07004391void TargetX8632::randomlyInsertNop(float Probability) {
4392 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4393 if (RNG.getTrueWithProbability(Probability)) {
Jim Stichnothe6d24782014-12-19 05:42:24 -08004394 _nop(RNG(X86_NUM_NOP_VARIANTS));
Matt Walac3302742014-08-15 16:21:56 -07004395 }
4396}
4397
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004398void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
4399 Func->setError("Phi found in regular instruction list");
4400}
4401
4402void TargetX8632::lowerRet(const InstRet *Inst) {
Jim Stichnothae953202014-12-20 06:17:49 -08004403 Variable *Reg = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004404 if (Inst->hasRetValue()) {
4405 Operand *Src0 = legalize(Inst->getRetValue());
4406 if (Src0->getType() == IceType_i64) {
Jim Stichnothad403532014-09-25 12:44:17 -07004407 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
4408 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004409 Reg = eax;
4410 Context.insert(InstFakeUse::create(Func, edx));
Jan Voung3a569182014-09-29 10:16:01 -07004411 } else if (isScalarFloatingType(Src0->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004412 _fld(Src0);
Matt Wala928f1292014-07-07 16:50:46 -07004413 } else if (isVectorType(Src0->getType())) {
Jim Stichnothad403532014-09-25 12:44:17 -07004414 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004415 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07004416 _mov(Reg, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004417 }
4418 }
Jim Stichnoth9f42d8c2015-02-20 09:20:14 -08004419 // Add a ret instruction even if sandboxing is enabled, because
4420 // addEpilog explicitly looks for a ret instruction as a marker for
4421 // where to insert the frame removal instructions.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004422 _ret(Reg);
4423 // Add a fake use of esp to make sure esp stays alive for the entire
4424 // function. Otherwise post-call esp adjustments get dead-code
4425 // eliminated. TODO: Are there more places where the fake use
4426 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
4427 // have a ret instruction.
Jan Voungbd385e42014-09-18 18:18:10 -07004428 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004429 Context.insert(InstFakeUse::create(Func, esp));
4430}
4431
4432void TargetX8632::lowerSelect(const InstSelect *Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004433 Variable *Dest = Inst->getDest();
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004434 Type DestTy = Dest->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004435 Operand *SrcT = Inst->getTrueOperand();
4436 Operand *SrcF = Inst->getFalseOperand();
Matt Wala9cb61e22014-07-24 09:44:42 -07004437 Operand *Condition = Inst->getCondition();
4438
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004439 if (isVectorType(DestTy)) {
Matt Wala9cb61e22014-07-24 09:44:42 -07004440 Type SrcTy = SrcT->getType();
4441 Variable *T = makeReg(SrcTy);
Matt Walae3777672014-07-31 09:06:17 -07004442 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4443 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07004444 if (InstructionSet >= SSE4_1) {
4445 // TODO(wala): If the condition operand is a constant, use blendps
4446 // or pblendw.
4447 //
4448 // Use blendvps or pblendvb to implement select.
4449 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4450 SrcTy == IceType_v4f32) {
Matt Walae3777672014-07-31 09:06:17 -07004451 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Jan Voungbd385e42014-09-18 18:18:10 -07004452 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
Matt Walae3777672014-07-31 09:06:17 -07004453 _movp(xmm0, ConditionRM);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004454 _psll(xmm0, Ctx->getConstantInt8(31));
Matt Walae3777672014-07-31 09:06:17 -07004455 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07004456 _blendvps(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07004457 _movp(Dest, T);
4458 } else {
4459 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4460 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
Jim Stichnothfac55172014-10-01 13:06:21 -07004461 : IceType_v16i8;
Jan Voungbd385e42014-09-18 18:18:10 -07004462 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07004463 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
Matt Walae3777672014-07-31 09:06:17 -07004464 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07004465 _pblendvb(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07004466 _movp(Dest, T);
4467 }
4468 return;
4469 }
4470 // Lower select without SSE4.1:
4471 // a=d?b:c ==>
4472 // if elementtype(d) != i1:
4473 // d=sext(d);
4474 // a=(b&d)|(c&~d);
Matt Wala9cb61e22014-07-24 09:44:42 -07004475 Variable *T2 = makeReg(SrcTy);
4476 // Sign extend the condition operand if applicable.
4477 if (SrcTy == IceType_v4f32) {
4478 // The sext operation takes only integer arguments.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004479 Variable *T3 = Func->makeVariable(IceType_v4i32);
Matt Wala9cb61e22014-07-24 09:44:42 -07004480 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4481 _movp(T, T3);
4482 } else if (typeElementType(SrcTy) != IceType_i1) {
4483 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4484 } else {
Matt Walae3777672014-07-31 09:06:17 -07004485 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4486 _movp(T, ConditionRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07004487 }
Matt Wala9cb61e22014-07-24 09:44:42 -07004488 _movp(T2, T);
Matt Walad4799f42014-08-14 14:24:12 -07004489 _pand(T, SrcTRM);
4490 _pandn(T2, SrcFRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07004491 _por(T, T2);
4492 _movp(Dest, T);
Matt Wala9cb61e22014-07-24 09:44:42 -07004493
4494 return;
4495 }
4496
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004497 CondX86::BrCond Cond = CondX86::Br_ne;
4498 Operand *CmpOpnd0 = nullptr;
4499 Operand *CmpOpnd1 = nullptr;
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07004500 // Handle folding opportunities.
4501 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4502 assert(Producer->isDeleted());
4503 switch (BoolFolding::getProducerKind(Producer)) {
4504 default:
4505 break;
4506 case BoolFolding::PK_Icmp32: {
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07004507 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004508 Cond = getIcmp32Mapping(Cmp->getCondition());
4509 CmpOpnd1 = legalize(Producer->getSrc(1));
4510 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
4511 } break;
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07004512 }
4513 }
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004514 if (CmpOpnd0 == nullptr) {
4515 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4516 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4517 }
4518 assert(CmpOpnd0);
4519 assert(CmpOpnd1);
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07004520
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004521 _cmp(CmpOpnd0, CmpOpnd1);
4522 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4523 // The cmov instruction doesn't allow 8-bit or FP operands, so
4524 // we need explicit control flow.
4525 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4526 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothad403532014-09-25 12:44:17 -07004527 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004528 _mov(Dest, SrcT);
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004529 _br(Cond, Label);
Jim Stichnothad403532014-09-25 12:44:17 -07004530 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07004531 _mov_nonkillable(Dest, SrcF);
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004532 Context.insert(Label);
4533 return;
4534 }
4535 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4536 // But if SrcT is immediate, we might be able to do better, as
4537 // the cmov instruction doesn't allow an immediate operand:
4538 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4539 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4540 std::swap(SrcT, SrcF);
4541 Cond = InstX8632::getOppositeCondition(Cond);
4542 }
4543 if (DestTy == IceType_i64) {
4544 // Set the low portion.
4545 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4546 Variable *TLo = nullptr;
4547 Operand *SrcFLo = legalize(loOperand(SrcF));
4548 _mov(TLo, SrcFLo);
4549 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4550 _cmov(TLo, SrcTLo, Cond);
4551 _mov(DestLo, TLo);
4552 // Set the high portion.
4553 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4554 Variable *THi = nullptr;
4555 Operand *SrcFHi = legalize(hiOperand(SrcF));
4556 _mov(THi, SrcFHi);
4557 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4558 _cmov(THi, SrcTHi, Cond);
4559 _mov(DestHi, THi);
4560 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004561 }
4562
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004563 assert(DestTy == IceType_i16 || DestTy == IceType_i32);
4564 Variable *T = nullptr;
4565 SrcF = legalize(SrcF);
4566 _mov(T, SrcF);
4567 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4568 _cmov(T, SrcT, Cond);
4569 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004570}
4571
4572void TargetX8632::lowerStore(const InstStore *Inst) {
4573 Operand *Value = Inst->getData();
4574 Operand *Addr = Inst->getAddr();
Jan Voungbefd03a2015-06-02 11:03:03 -07004575 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
Matt Wala105b7042014-08-11 19:56:19 -07004576 Type Ty = NewAddr->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004577
Matt Wala105b7042014-08-11 19:56:19 -07004578 if (Ty == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004579 Value = legalize(Value);
Jim Stichnothad403532014-09-25 12:44:17 -07004580 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4581 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004582 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
4583 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
Matt Wala105b7042014-08-11 19:56:19 -07004584 } else if (isVectorType(Ty)) {
4585 _storep(legalizeToVar(Value), NewAddr);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004586 } else {
Jim Stichnothad403532014-09-25 12:44:17 -07004587 Value = legalize(Value, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004588 _store(Value, NewAddr);
4589 }
4590}
4591
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004592void TargetX8632::doAddressOptStore() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08004593 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004594 Operand *Data = Inst->getData();
4595 Operand *Addr = Inst->getAddr();
Jim Stichnothae953202014-12-20 06:17:49 -08004596 Variable *Index = nullptr;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004597 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004598 int32_t Offset = 0; // TODO: make Constant
4599 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004600 // Vanilla ICE store instructions should not use the segment registers,
4601 // and computeAddressOpt only works at the level of Variables and Constants,
4602 // not other OperandX8632Mem, so there should be no mention of segment
4603 // registers there either.
4604 const OperandX8632Mem::SegmentRegisters SegmentReg =
4605 OperandX8632Mem::DefaultSegment;
Jim Stichnoth89d79562014-08-27 13:50:03 -07004606 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004607 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004608 Inst->setDeleted();
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004609 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004610 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004611 Shift, SegmentReg);
Jim Stichnothe4f65d82015-06-17 22:16:02 -07004612 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4613 if (Inst->getDest())
4614 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4615 Context.insert(NewStore);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004616 }
4617}
4618
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004619void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4620 // This implements the most naive possible lowering.
4621 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4622 Operand *Src0 = Inst->getComparison();
4623 SizeT NumCases = Inst->getNumCases();
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004624 if (Src0->getType() == IceType_i64) {
4625 Src0 = legalize(Src0); // get Base/Index into physical registers
4626 Operand *Src0Lo = loOperand(Src0);
4627 Operand *Src0Hi = hiOperand(Src0);
4628 if (NumCases >= 2) {
4629 Src0Lo = legalizeToVar(Src0Lo);
4630 Src0Hi = legalizeToVar(Src0Hi);
4631 } else {
4632 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4633 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4634 }
4635 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004636 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4637 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004638 InstX8632Label *Label = InstX8632Label::create(Func, this);
4639 _cmp(Src0Lo, ValueLo);
4640 _br(CondX86::Br_ne, Label);
4641 _cmp(Src0Hi, ValueHi);
4642 _br(CondX86::Br_e, Inst->getLabel(I));
4643 Context.insert(Label);
4644 }
4645 _br(Inst->getLabelDefault());
4646 return;
4647 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004648 // OK, we'll be slightly less naive by forcing Src into a physical
4649 // register if there are 2 or more uses.
4650 if (NumCases >= 2)
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004651 Src0 = legalizeToVar(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004652 else
Jim Stichnothad403532014-09-25 12:44:17 -07004653 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004654 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004655 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004656 _cmp(Src0, Value);
Jan Voungbd385e42014-09-18 18:18:10 -07004657 _br(CondX86::Br_e, Inst->getLabel(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004658 }
4659
4660 _br(Inst->getLabelDefault());
4661}
4662
Matt Walaafeaee42014-08-07 13:47:30 -07004663void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4664 Variable *Dest, Operand *Src0,
4665 Operand *Src1) {
4666 assert(isVectorType(Dest->getType()));
4667 Type Ty = Dest->getType();
4668 Type ElementTy = typeElementType(Ty);
4669 SizeT NumElements = typeNumElements(Ty);
4670
4671 Operand *T = Ctx->getConstantUndef(Ty);
4672 for (SizeT I = 0; I < NumElements; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004673 Constant *Index = Ctx->getConstantInt32(I);
Matt Walaafeaee42014-08-07 13:47:30 -07004674
4675 // Extract the next two inputs.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004676 Variable *Op0 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004677 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004678 Variable *Op1 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004679 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4680
4681 // Perform the arithmetic as a scalar operation.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004682 Variable *Res = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004683 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4684
4685 // Insert the result into position.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004686 Variable *DestT = Func->makeVariable(Ty);
Matt Walaafeaee42014-08-07 13:47:30 -07004687 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4688 T = DestT;
Matt Walaafeaee42014-08-07 13:47:30 -07004689 }
4690
4691 lowerAssign(InstAssign::create(Func, Dest, T));
4692}
4693
Matt Walace0ca8f2014-07-24 12:34:20 -07004694// The following pattern occurs often in lowered C and C++ code:
4695//
4696// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4697// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4698//
4699// We can eliminate the sext operation by copying the result of pcmpeqd,
4700// pcmpgtd, or cmpps (which produce sign extended results) to the result
4701// of the sext operation.
Jim Stichnothd9dc82e2015-03-03 17:06:33 -08004702void TargetX8632::eliminateNextVectorSextInstruction(
4703 Variable *SignExtendedResult) {
Matt Walace0ca8f2014-07-24 12:34:20 -07004704 if (InstCast *NextCast =
4705 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4706 if (NextCast->getCastKind() == InstCast::Sext &&
4707 NextCast->getSrc(0) == SignExtendedResult) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004708 NextCast->setDeleted();
Matt Walace0ca8f2014-07-24 12:34:20 -07004709 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4710 // Skip over the instruction.
Matt Walace0ca8f2014-07-24 12:34:20 -07004711 Context.advanceNext();
4712 }
4713 }
4714}
4715
Jim Stichnoth9738a9e2015-02-23 16:39:06 -08004716void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004717
Jim Stichnothe4f65d82015-06-17 22:16:02 -07004718void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {
4719 // If the beacon variable's live range does not end in this
4720 // instruction, then it must end in the modified Store instruction
4721 // that follows. This means that the original Store instruction is
4722 // still there, either because the value being stored is used beyond
4723 // the Store instruction, or because dead code elimination did not
4724 // happen. In either case, we cancel RMW lowering (and the caller
4725 // deletes the RMW instruction).
4726 if (!RMW->isLastUse(RMW->getBeacon()))
4727 return;
4728 Operand *Src = RMW->getData();
4729 Type Ty = Src->getType();
4730 OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4731 if (Ty == IceType_i64) {
Jim Stichnothcac003e2015-06-18 12:48:58 -07004732 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4733 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
4734 OperandX8632Mem *AddrLo = llvm::cast<OperandX8632Mem>(loOperand(Addr));
4735 OperandX8632Mem *AddrHi = llvm::cast<OperandX8632Mem>(hiOperand(Addr));
4736 switch (RMW->getOp()) {
4737 default:
4738 // TODO(stichnot): Implement other arithmetic operators.
4739 break;
4740 case InstArithmetic::Add:
4741 _add_rmw(AddrLo, SrcLo);
4742 _adc_rmw(AddrHi, SrcHi);
4743 return;
4744 case InstArithmetic::Sub:
4745 _sub_rmw(AddrLo, SrcLo);
4746 _sbb_rmw(AddrHi, SrcHi);
4747 return;
4748 case InstArithmetic::And:
4749 _and_rmw(AddrLo, SrcLo);
4750 _and_rmw(AddrHi, SrcHi);
4751 return;
4752 case InstArithmetic::Or:
4753 _or_rmw(AddrLo, SrcLo);
4754 _or_rmw(AddrHi, SrcHi);
4755 return;
4756 case InstArithmetic::Xor:
4757 _xor_rmw(AddrLo, SrcLo);
4758 _xor_rmw(AddrHi, SrcHi);
4759 return;
4760 }
Jim Stichnothe4f65d82015-06-17 22:16:02 -07004761 } else {
Jim Stichnothcac003e2015-06-18 12:48:58 -07004762 // i8, i16, i32
Jim Stichnothe4f65d82015-06-17 22:16:02 -07004763 switch (RMW->getOp()) {
4764 default:
4765 // TODO(stichnot): Implement other arithmetic operators.
4766 break;
4767 case InstArithmetic::Add:
4768 Src = legalize(Src, Legal_Reg | Legal_Imm);
4769 _add_rmw(Addr, Src);
4770 return;
Jim Stichnothcac003e2015-06-18 12:48:58 -07004771 case InstArithmetic::Sub:
4772 Src = legalize(Src, Legal_Reg | Legal_Imm);
4773 _sub_rmw(Addr, Src);
4774 return;
4775 case InstArithmetic::And:
4776 Src = legalize(Src, Legal_Reg | Legal_Imm);
4777 _and_rmw(Addr, Src);
4778 return;
4779 case InstArithmetic::Or:
4780 Src = legalize(Src, Legal_Reg | Legal_Imm);
4781 _or_rmw(Addr, Src);
4782 return;
4783 case InstArithmetic::Xor:
4784 Src = legalize(Src, Legal_Reg | Legal_Imm);
4785 _xor_rmw(Addr, Src);
4786 return;
Jim Stichnothe4f65d82015-06-17 22:16:02 -07004787 }
4788 }
4789 llvm::report_fatal_error("Couldn't lower RMW instruction");
4790}
4791
4792void TargetX8632::lowerOther(const Inst *Instr) {
4793 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
4794 lowerRMW(RMW);
4795 } else {
4796 TargetLowering::lowerOther(Instr);
4797 }
4798}
4799
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004800// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4801// preserve integrity of liveness analysis. Undef values are also
4802// turned into zeroes, since loOperand() and hiOperand() don't expect
4803// Undef input.
4804void TargetX8632::prelowerPhis() {
Qining Lu253dc8a2015-06-22 10:10:23 -07004805 // Pause constant blinding or pooling, blinding or pooling will be done later
4806 // during phi lowering assignments
4807 BoolFlagSaver B(RandomizationPoolingPaused, true);
4808
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004809 CfgNode *Node = Context.getNode();
Jim Stichnoth29841e82014-12-23 12:26:24 -08004810 for (Inst &I : Node->getPhis()) {
4811 auto Phi = llvm::dyn_cast<InstPhi>(&I);
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004812 if (Phi->isDeleted())
4813 continue;
4814 Variable *Dest = Phi->getDest();
4815 if (Dest->getType() == IceType_i64) {
4816 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4817 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4818 InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
4819 InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);
4820 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
4821 Operand *Src = Phi->getSrc(I);
4822 CfgNode *Label = Phi->getLabel(I);
4823 if (llvm::isa<ConstantUndef>(Src))
4824 Src = Ctx->getConstantZero(Dest->getType());
4825 PhiLo->addArgument(loOperand(Src), Label);
4826 PhiHi->addArgument(hiOperand(Src), Label);
4827 }
4828 Node->getPhis().push_back(PhiLo);
4829 Node->getPhis().push_back(PhiHi);
4830 Phi->setDeleted();
4831 }
4832 }
4833}
4834
4835namespace {
4836
4837bool isMemoryOperand(const Operand *Opnd) {
4838 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
4839 return !Var->hasReg();
Jim Stichnoth5bc44312015-03-02 16:56:20 -08004840 // We treat vector undef values the same as a memory operand,
4841 // because they do in fact need a register to materialize the vector
4842 // of zeroes into.
4843 if (llvm::isa<ConstantUndef>(Opnd))
4844 return isScalarFloatingType(Opnd->getType()) ||
4845 isVectorType(Opnd->getType());
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004846 if (llvm::isa<Constant>(Opnd))
4847 return isScalarFloatingType(Opnd->getType());
4848 return true;
4849}
4850
4851} // end of anonymous namespace
4852
4853// Lower the pre-ordered list of assignments into mov instructions.
4854// Also has to do some ad-hoc register allocation as necessary.
4855void TargetX8632::lowerPhiAssignments(CfgNode *Node,
4856 const AssignList &Assignments) {
4857 // Check that this is a properly initialized shell of a node.
4858 assert(Node->getOutEdges().size() == 1);
4859 assert(Node->getInsts().empty());
4860 assert(Node->getPhis().empty());
Jim Stichnothbfb410d2014-11-05 16:04:05 -08004861 CfgNode *Succ = Node->getOutEdges().front();
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004862 getContext().init(Node);
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004863 // Register set setup similar to regAlloc().
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004864 RegSetMask RegInclude = RegSet_All;
4865 RegSetMask RegExclude = RegSet_StackPointer;
4866 if (hasFramePointer())
4867 RegExclude |= RegSet_FramePointer;
4868 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
4869 bool NeedsRegs = false;
4870 // Initialize the set of available registers to the set of what is
4871 // available (not live) at the beginning of the successor block,
4872 // minus all registers used as Dest operands in the Assignments. To
4873 // do this, we start off assuming all registers are available, then
4874 // iterate through the Assignments and remove Dest registers.
4875 // During this iteration, we also determine whether we will actually
4876 // need any extra registers for memory-to-memory copies. If so, we
4877 // do the actual work of removing the live-in registers from the
4878 // set. TODO(stichnot): This work is being repeated for every split
4879 // edge to the successor, so consider updating LiveIn just once
4880 // after all the edges are split.
Jim Stichnoth29841e82014-12-23 12:26:24 -08004881 for (const Inst &I : Assignments) {
4882 Variable *Dest = I.getDest();
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004883 if (Dest->hasReg()) {
4884 Available[Dest->getRegNum()] = false;
Jim Stichnoth29841e82014-12-23 12:26:24 -08004885 } else if (isMemoryOperand(I.getSrc(0))) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004886 NeedsRegs = true; // Src and Dest are both in memory
4887 }
4888 }
4889 if (NeedsRegs) {
4890 LivenessBV &LiveIn = Func->getLiveness()->getLiveIn(Succ);
4891 for (int i = LiveIn.find_first(); i != -1; i = LiveIn.find_next(i)) {
4892 Variable *Var = Func->getLiveness()->getVariable(i, Succ);
4893 if (Var->hasReg())
4894 Available[Var->getRegNum()] = false;
4895 }
4896 }
4897 // Iterate backwards through the Assignments. After lowering each
4898 // assignment, add Dest to the set of available registers, and
4899 // remove Src from the set of available registers. Iteration is
4900 // done backwards to enable incremental updates of the available
4901 // register set, and the lowered instruction numbers may be out of
4902 // order, but that can be worked around by renumbering the block
4903 // afterwards if necessary.
Jim Stichnoth7e571362015-01-09 11:43:26 -08004904 for (const Inst &I : reverse_range(Assignments)) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004905 Context.rewind();
Jim Stichnoth7e571362015-01-09 11:43:26 -08004906 auto Assign = llvm::dyn_cast<InstAssign>(&I);
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004907 Variable *Dest = Assign->getDest();
Qining Lu253dc8a2015-06-22 10:10:23 -07004908
4909 // If the source operand is ConstantUndef, do not legalize it.
4910 // In function test_split_undef_int_vec, the advanced phi
4911 // lowering process will find an assignment of undefined
4912 // vector. This vector, as the Src here, will crash if it
4913 // go through legalize(). legalize() will create new variable
4914 // with makeVectorOfZeros(), but this new variable will be
4915 // assigned a stack slot. This will fail the assertion in
4916 // IceInstX8632.cpp:789, as XmmEmitterRegOp() complain:
4917 // Var->hasReg() fails. Note this failure is irrelevant to
4918 // randomization or pooling of constants.
4919 // So, we do not call legalize() to add pool label for the
4920 // src operands of phi assignment instructions.
4921 // Instead, we manually add pool label for constant float and
4922 // constant double values here.
4923 // Note going through legalize() does not affect the testing
4924 // results of SPEC2K and xtests.
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004925 Operand *Src = Assign->getSrc(0);
Qining Lu253dc8a2015-06-22 10:10:23 -07004926 if (!llvm::isa<ConstantUndef>(Assign->getSrc(0))) {
4927 Src = legalize(Src);
4928 }
4929
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004930 Variable *SrcVar = llvm::dyn_cast<Variable>(Src);
4931 // Use normal assignment lowering, except lower mem=mem specially
4932 // so we can register-allocate at the same time.
4933 if (!isMemoryOperand(Dest) || !isMemoryOperand(Src)) {
4934 lowerAssign(Assign);
4935 } else {
4936 assert(Dest->getType() == Src->getType());
4937 const llvm::SmallBitVector &RegsForType =
4938 getRegisterSetForType(Dest->getType());
4939 llvm::SmallBitVector AvailRegsForType = RegsForType & Available;
Jim Stichnothae953202014-12-20 06:17:49 -08004940 Variable *SpillLoc = nullptr;
4941 Variable *Preg = nullptr;
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004942 // TODO(stichnot): Opportunity for register randomization.
4943 int32_t RegNum = AvailRegsForType.find_first();
4944 bool IsVector = isVectorType(Dest->getType());
4945 bool NeedSpill = (RegNum == -1);
4946 if (NeedSpill) {
4947 // Pick some register to spill and update RegNum.
4948 // TODO(stichnot): Opportunity for register randomization.
4949 RegNum = RegsForType.find_first();
4950 Preg = getPhysicalRegister(RegNum, Dest->getType());
4951 SpillLoc = Func->makeVariable(Dest->getType());
Jim Stichnoth76dcf1a2015-04-29 10:20:07 -07004952 // Create a fake def of the physical register to avoid
4953 // liveness inconsistency problems during late-stage liveness
4954 // analysis (e.g. asm-verbose mode).
4955 Context.insert(InstFakeDef::create(Func, Preg));
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004956 if (IsVector)
4957 _movp(SpillLoc, Preg);
4958 else
4959 _mov(SpillLoc, Preg);
4960 }
4961 assert(RegNum >= 0);
4962 if (llvm::isa<ConstantUndef>(Src))
4963 // Materialize an actual constant instead of undef. RegNum is
4964 // passed in for vector types because undef vectors are
4965 // lowered to vector register of zeroes.
4966 Src =
4967 legalize(Src, Legal_All, IsVector ? RegNum : Variable::NoRegister);
4968 Variable *Tmp = makeReg(Dest->getType(), RegNum);
4969 if (IsVector) {
4970 _movp(Tmp, Src);
4971 _movp(Dest, Tmp);
4972 } else {
4973 _mov(Tmp, Src);
4974 _mov(Dest, Tmp);
4975 }
4976 if (NeedSpill) {
4977 // Restore the spilled register.
4978 if (IsVector)
4979 _movp(Preg, SpillLoc);
4980 else
4981 _mov(Preg, SpillLoc);
Jim Stichnoth76dcf1a2015-04-29 10:20:07 -07004982 // Create a fake use of the physical register to keep it live
4983 // for late-stage liveness analysis (e.g. asm-verbose mode).
4984 Context.insert(InstFakeUse::create(Func, Preg));
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004985 }
4986 }
4987 // Update register availability before moving to the previous
4988 // instruction on the Assignments list.
4989 if (Dest->hasReg())
4990 Available[Dest->getRegNum()] = true;
4991 if (SrcVar && SrcVar->hasReg())
4992 Available[SrcVar->getRegNum()] = false;
4993 }
4994
4995 // Add the terminator branch instruction to the end.
Jim Stichnoth5d2fa0c2014-12-01 09:30:55 -08004996 Context.setInsertPoint(Context.getEnd());
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004997 _br(Succ);
4998}
4999
Matt Wala9a0168a2014-07-23 14:56:10 -07005000// There is no support for loading or emitting vector constants, so the
5001// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
5002// etc. are initialized with register operations.
5003//
5004// TODO(wala): Add limited support for vector constants so that
5005// complex initialization in registers is unnecessary.
5006
Matt Wala83b80362014-07-16 10:21:30 -07005007Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07005008 Variable *Reg = makeReg(Ty, RegNum);
5009 // Insert a FakeDef, since otherwise the live range of Reg might
5010 // be overestimated.
5011 Context.insert(InstFakeDef::create(Func, Reg));
5012 _pxor(Reg, Reg);
5013 return Reg;
5014}
5015
Matt Wala9a0168a2014-07-23 14:56:10 -07005016Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
5017 Variable *MinusOnes = makeReg(Ty, RegNum);
5018 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
5019 Context.insert(InstFakeDef::create(Func, MinusOnes));
5020 _pcmpeq(MinusOnes, MinusOnes);
5021 return MinusOnes;
5022}
5023
Matt Wala83b80362014-07-16 10:21:30 -07005024Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07005025 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
Matt Wala9a0168a2014-07-23 14:56:10 -07005026 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
Matt Wala83b80362014-07-16 10:21:30 -07005027 _psub(Dest, MinusOne);
5028 return Dest;
5029}
5030
Matt Wala9a0168a2014-07-23 14:56:10 -07005031Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
5032 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
5033 Ty == IceType_v16i8);
5034 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
5035 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
5036 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08005037 _psll(Reg, Ctx->getConstantInt8(Shift));
Matt Wala9a0168a2014-07-23 14:56:10 -07005038 return Reg;
5039 } else {
5040 // SSE has no left shift operation for vectors of 8 bit integers.
5041 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08005042 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
Matt Wala9a0168a2014-07-23 14:56:10 -07005043 Variable *Reg = makeReg(Ty, RegNum);
5044 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
5045 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
5046 return Reg;
5047 }
5048}
5049
Jim Stichnoth8c980d02015-03-19 13:01:50 -07005050// Construct a mask in a register that can be and'ed with a
5051// floating-point value to mask off its sign bit. The value will be
5052// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
5053// for f64. Construct it as vector of ones logically right shifted
5054// one bit. TODO(stichnot): Fix the wala TODO above, to represent
5055// vector constants in memory.
5056Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) {
5057 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
5058 _psrl(Reg, Ctx->getConstantInt8(1));
5059 return Reg;
5060}
5061
Matt Wala49889232014-07-18 12:45:09 -07005062OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
5063 Variable *Slot,
5064 uint32_t Offset) {
5065 // Ensure that Loc is a stack slot.
Jim Stichnothc6ead202015-02-24 09:30:30 -08005066 assert(Slot->getWeight().isZero());
Matt Wala49889232014-07-18 12:45:09 -07005067 assert(Slot->getRegNum() == Variable::NoRegister);
5068 // Compute the location of Loc in memory.
5069 // TODO(wala,stichnot): lea should not be required. The address of
5070 // the stack slot is known at compile time (although not until after
5071 // addProlog()).
5072 const Type PointerType = IceType_i32;
5073 Variable *Loc = makeReg(PointerType);
5074 _lea(Loc, Slot);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08005075 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
Matt Wala49889232014-07-18 12:45:09 -07005076 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
5077}
5078
Matt Wala928f1292014-07-07 16:50:46 -07005079// Helper for legalize() to emit the right code to lower an operand to a
5080// register of the appropriate type.
5081Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
5082 Type Ty = Src->getType();
5083 Variable *Reg = makeReg(Ty, RegNum);
Matt Walaad8f7262014-07-14 17:37:37 -07005084 if (isVectorType(Ty)) {
Matt Wala928f1292014-07-07 16:50:46 -07005085 _movp(Reg, Src);
5086 } else {
5087 _mov(Reg, Src);
5088 }
5089 return Reg;
5090}
5091
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005092Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
Jim Stichnothad403532014-09-25 12:44:17 -07005093 int32_t RegNum) {
Jim Stichnoth03ffa582015-06-04 09:25:07 -07005094 Type Ty = From->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005095 // Assert that a physical register is allowed. To date, all calls
5096 // to legalize() allow a physical register. If a physical register
5097 // needs to be explicitly disallowed, then new code will need to be
5098 // written to force a spill.
5099 assert(Allowed & Legal_Reg);
5100 // If we're asking for a specific physical register, make sure we're
5101 // not allowing any other operand kinds. (This could be future
5102 // work, e.g. allow the shl shift amount to be either an immediate
5103 // or in ecx.)
5104 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
Qining Lu253dc8a2015-06-22 10:10:23 -07005105
Jan Voungb3401d22015-05-18 09:38:21 -07005106 if (auto Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005107 // Before doing anything with a Mem operand, we need to ensure
5108 // that the Base and Index components are in physical registers.
5109 Variable *Base = Mem->getBase();
5110 Variable *Index = Mem->getIndex();
Jim Stichnothae953202014-12-20 06:17:49 -08005111 Variable *RegBase = nullptr;
5112 Variable *RegIndex = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005113 if (Base) {
Jim Stichnothad403532014-09-25 12:44:17 -07005114 RegBase = legalizeToVar(Base);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005115 }
5116 if (Index) {
Jim Stichnothad403532014-09-25 12:44:17 -07005117 RegIndex = legalizeToVar(Index);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005118 }
5119 if (Base != RegBase || Index != RegIndex) {
Qining Lu253dc8a2015-06-22 10:10:23 -07005120 Mem =
Jim Stichnoth03ffa582015-06-04 09:25:07 -07005121 OperandX8632Mem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex,
5122 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005123 }
5124
Qining Lu253dc8a2015-06-22 10:10:23 -07005125 // For all Memory Operands, we do randomization/pooling here
5126 From = randomizeOrPoolImmediate(Mem);
5127
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005128 if (!(Allowed & Legal_Mem)) {
Matt Wala928f1292014-07-07 16:50:46 -07005129 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005130 }
5131 return From;
5132 }
Qining Lu253dc8a2015-06-22 10:10:23 -07005133 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
5134 if (llvm::isa<ConstantUndef>(Const)) {
Matt Walad8f4a7d2014-06-18 09:55:03 -07005135 // Lower undefs to zero. Another option is to lower undefs to an
5136 // uninitialized register; however, using an uninitialized register
5137 // results in less predictable code.
5138 //
5139 // If in the future the implementation is changed to lower undef
5140 // values to uninitialized registers, a FakeDef will be needed:
5141 // Context.insert(InstFakeDef::create(Func, Reg));
5142 // This is in order to ensure that the live range of Reg is not
5143 // overestimated. If the constant being lowered is a 64 bit value,
5144 // then the result should be split and the lo and hi components will
5145 // need to go in uninitialized registers.
Jim Stichnoth03ffa582015-06-04 09:25:07 -07005146 if (isVectorType(Ty))
5147 return makeVectorOfZeros(Ty, RegNum);
Qining Lu253dc8a2015-06-22 10:10:23 -07005148 Const = Ctx->getConstantZero(Ty);
5149 From = Const;
Matt Walad8f4a7d2014-06-18 09:55:03 -07005150 }
Matt Walaad8f7262014-07-14 17:37:37 -07005151 // There should be no constants of vector type (other than undef).
Jim Stichnoth03ffa582015-06-04 09:25:07 -07005152 assert(!isVectorType(Ty));
Qining Lu253dc8a2015-06-22 10:10:23 -07005153
5154 // If the operand is an 32 bit constant integer, we should check
5155 // whether we need to randomize it or pool it.
5156 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
5157 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
5158 if (NewConst != Const) {
5159 return NewConst;
5160 }
5161 }
5162
Jim Stichnoth03ffa582015-06-04 09:25:07 -07005163 // Convert a scalar floating point constant into an explicit
5164 // memory operand.
5165 if (isScalarFloatingType(Ty)) {
5166 Variable *Base = nullptr;
5167 std::string Buffer;
5168 llvm::raw_string_ostream StrBuf(Buffer);
5169 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
Qining Lu253dc8a2015-06-22 10:10:23 -07005170 llvm::cast<Constant>(From)->setShouldBePooled(true);
Jim Stichnoth03ffa582015-06-04 09:25:07 -07005171 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
5172 From = OperandX8632Mem::create(Func, Ty, Base, Offset);
5173 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07005174 bool NeedsReg = false;
Jim Stichnoth03ffa582015-06-04 09:25:07 -07005175 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
Jim Stichnothde4ca712014-06-29 08:13:48 -07005176 // Immediate specifically not allowed
5177 NeedsReg = true;
Jim Stichnoth03ffa582015-06-04 09:25:07 -07005178 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
Jim Stichnothde4ca712014-06-29 08:13:48 -07005179 // On x86, FP constants are lowered to mem operands.
5180 NeedsReg = true;
Jan Voung5a13f452014-06-18 10:42:02 -07005181 if (NeedsReg) {
Matt Wala928f1292014-07-07 16:50:46 -07005182 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005183 }
5184 return From;
5185 }
Jan Voungb3401d22015-05-18 09:38:21 -07005186 if (auto Var = llvm::dyn_cast<Variable>(From)) {
Matt Walaad8f7262014-07-14 17:37:37 -07005187 // Check if the variable is guaranteed a physical register. This
5188 // can happen either when the variable is pre-colored or when it is
5189 // assigned infinite weight.
Jim Stichnothc6ead202015-02-24 09:30:30 -08005190 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005191 // We need a new physical register for the operand if:
Jim Stichnothca662e92014-07-10 15:32:36 -07005192 // Mem is not allowed and Var isn't guaranteed a physical
5193 // register, or
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005194 // RegNum is required and Var->getRegNum() doesn't match.
Matt Walaad8f7262014-07-14 17:37:37 -07005195 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005196 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
Jim Stichnothad403532014-09-25 12:44:17 -07005197 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005198 }
5199 return From;
5200 }
5201 llvm_unreachable("Unhandled operand kind in legalize()");
5202 return From;
5203}
5204
5205// Provide a trivial wrapper to legalize() for this common usage.
Jim Stichnothad403532014-09-25 12:44:17 -07005206Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
5207 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005208}
5209
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07005210// For the cmp instruction, if Src1 is an immediate, or known to be a
5211// physical register, we can allow Src0 to be a memory operand.
5212// Otherwise, Src0 must be copied into a physical register.
5213// (Actually, either Src0 or Src1 can be chosen for the physical
5214// register, but unfortunately we have to commit to one or the other
5215// before register allocation.)
5216Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {
5217 bool IsSrc1ImmOrReg = false;
5218 if (llvm::isa<Constant>(Src1)) {
5219 IsSrc1ImmOrReg = true;
5220 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
5221 if (Var->hasReg())
5222 IsSrc1ImmOrReg = true;
5223 }
5224 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
5225}
5226
Qining Lu253dc8a2015-06-22 10:10:23 -07005227OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Opnd, Type Ty,
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -07005228 bool DoLegalize) {
Qining Lu253dc8a2015-06-22 10:10:23 -07005229 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);
Jan Voung5cd240d2014-06-25 10:36:46 -07005230 // It may be the case that address mode optimization already creates
5231 // an OperandX8632Mem, so in that case it wouldn't need another level
5232 // of transformation.
5233 if (!Mem) {
Qining Lu253dc8a2015-06-22 10:10:23 -07005234 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
5235 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
Jan Voung5cd240d2014-06-25 10:36:46 -07005236 assert(Base || Offset);
Matt Walae3777672014-07-31 09:06:17 -07005237 if (Offset) {
Qining Lu253dc8a2015-06-22 10:10:23 -07005238 // During memory operand building, we do not blind or pool
5239 // the constant offset, we will work on the whole memory
5240 // operand later as one entity later, this save one instruction.
5241 // By turning blinding and pooling off, we guarantee
5242 // legalize(Offset) will return a constant*.
5243 {
5244 BoolFlagSaver B(RandomizationPoolingPaused, true);
5245
5246 Offset = llvm::cast<Constant>(legalize(Offset));
5247 }
5248
Jan Voungbc004632014-09-16 15:09:10 -07005249 assert(llvm::isa<ConstantInteger32>(Offset) ||
Matt Walae3777672014-07-31 09:06:17 -07005250 llvm::isa<ConstantRelocatable>(Offset));
5251 }
Jan Voung5cd240d2014-06-25 10:36:46 -07005252 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
5253 }
Qining Lu253dc8a2015-06-22 10:10:23 -07005254 // Do legalization, which contains randomization/pooling
5255 // or do randomization/pooling.
5256 return llvm::cast<OperandX8632Mem>(
5257 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
Jan Voung5cd240d2014-06-25 10:36:46 -07005258}
5259
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005260Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
Jan Voung1ee34162014-06-24 13:43:30 -07005261 // There aren't any 64-bit integer registers for x86-32.
5262 assert(Type != IceType_i64);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07005263 Variable *Reg = Func->makeVariable(Type);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005264 if (RegNum == Variable::NoRegister)
5265 Reg->setWeightInfinite();
5266 else
5267 Reg->setRegNum(RegNum);
5268 return Reg;
5269}
5270
5271void TargetX8632::postLower() {
Jan Voung1f47ad02015-03-20 15:01:26 -07005272 if (Ctx->getFlags().getOptLevel() == Opt_m1)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005273 return;
Jan Voungb3401d22015-05-18 09:38:21 -07005274 inferTwoAddress();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005275}
5276
Jim Stichnothe6d24782014-12-19 05:42:24 -08005277void TargetX8632::makeRandomRegisterPermutation(
5278 llvm::SmallVectorImpl<int32_t> &Permutation,
5279 const llvm::SmallBitVector &ExcludeRegisters) const {
5280 // TODO(stichnot): Declaring Permutation this way loses type/size
5281 // information. Fix this in conjunction with the caller-side TODO.
5282 assert(Permutation.size() >= RegX8632::Reg_NUM);
5283 // Expected upper bound on the number of registers in a single
5284 // equivalence class. For x86-32, this would comprise the 8 XMM
5285 // registers. This is for performance, not correctness.
5286 static const unsigned MaxEquivalenceClassSize = 8;
5287 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
5288 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
5289 EquivalenceClassMap EquivalenceClasses;
5290 SizeT NumShuffled = 0, NumPreserved = 0;
5291
5292// Build up the equivalence classes of registers by looking at the
5293// register properties as well as whether the registers should be
5294// explicitly excluded from shuffling.
5295#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
5296 frameptr, isI8, isInt, isFP) \
5297 if (ExcludeRegisters[RegX8632::val]) { \
5298 /* val stays the same in the resulting permutation. */ \
5299 Permutation[RegX8632::val] = RegX8632::val; \
5300 ++NumPreserved; \
5301 } else { \
5302 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
5303 (isInt << 3) | (isFP << 4); \
5304 /* val is assigned to an equivalence class based on its properties. */ \
5305 EquivalenceClasses[Index].push_back(RegX8632::val); \
5306 }
5307 REGX8632_TABLE
5308#undef X
5309
5310 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
5311
5312 // Shuffle the resulting equivalence classes.
5313 for (auto I : EquivalenceClasses) {
5314 const RegisterList &List = I.second;
5315 RegisterList Shuffled(List);
Jim Stichnothcaef3482015-04-09 11:19:38 -07005316 RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);
Jim Stichnothe6d24782014-12-19 05:42:24 -08005317 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
5318 Permutation[List[SI]] = Shuffled[SI];
5319 ++NumShuffled;
5320 }
5321 }
5322
5323 assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
5324
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005325 if (Func->isVerbose(IceV_Random)) {
Jim Stichnothe4a8f402015-01-20 12:52:51 -08005326 OstreamLocker L(Func->getContext());
Jim Stichnothe6d24782014-12-19 05:42:24 -08005327 Ostream &Str = Func->getContext()->getStrDump();
5328 Str << "Register equivalence classes:\n";
5329 for (auto I : EquivalenceClasses) {
5330 Str << "{";
5331 const RegisterList &List = I.second;
5332 bool First = true;
5333 for (int32_t Register : List) {
5334 if (!First)
5335 Str << " ";
5336 First = false;
5337 Str << getRegName(Register, IceType_i32);
5338 }
5339 Str << "}\n";
5340 }
5341 }
5342}
5343
Jan Voung76bb0be2015-05-14 09:26:19 -07005344void TargetX8632::emit(const ConstantInteger32 *C) const {
Jan Voungf644a4b2015-03-19 11:57:52 -07005345 if (!ALLOW_DUMP)
5346 return;
5347 Ostream &Str = Ctx->getStrEmit();
Jan Voung76bb0be2015-05-14 09:26:19 -07005348 Str << getConstantPrefix() << C->getValue();
Jan Voungf644a4b2015-03-19 11:57:52 -07005349}
5350
Jan Voung76bb0be2015-05-14 09:26:19 -07005351void TargetX8632::emit(const ConstantInteger64 *) const {
5352 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
5353}
5354
5355void TargetX8632::emit(const ConstantFloat *C) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08005356 if (!ALLOW_DUMP)
5357 return;
Matt Wala928f1292014-07-07 16:50:46 -07005358 Ostream &Str = Ctx->getStrEmit();
Jan Voung76bb0be2015-05-14 09:26:19 -07005359 C->emitPoolLabel(Str);
Jan Voungf644a4b2015-03-19 11:57:52 -07005360}
5361
Jan Voung76bb0be2015-05-14 09:26:19 -07005362void TargetX8632::emit(const ConstantDouble *C) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08005363 if (!ALLOW_DUMP)
5364 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07005365 Ostream &Str = Ctx->getStrEmit();
Jan Voung76bb0be2015-05-14 09:26:19 -07005366 C->emitPoolLabel(Str);
Jim Stichnothf61d5b22014-05-23 13:31:24 -07005367}
5368
Jan Voung76bb0be2015-05-14 09:26:19 -07005369void TargetX8632::emit(const ConstantUndef *) const {
5370 llvm::report_fatal_error("undef value encountered by emitter.");
Matt Walae3777672014-07-31 09:06:17 -07005371}
5372
Jan Voung72984d82015-01-29 14:42:38 -08005373TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
5374 : TargetDataLowering(Ctx) {}
Jim Stichnothde4ca712014-06-29 08:13:48 -07005375
John Porto8b1a7052015-06-17 13:20:08 -07005376void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
5377 const IceString &SectionSuffix) {
Jim Stichnothd442e7e2015-02-12 14:01:48 -08005378 switch (Ctx->getFlags().getOutFileType()) {
5379 case FT_Elf: {
Jim Stichnothbbca7542015-02-11 16:08:31 -08005380 ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto8b1a7052015-06-17 13:20:08 -07005381 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
Jim Stichnothd442e7e2015-02-12 14:01:48 -08005382 } break;
5383 case FT_Asm:
5384 case FT_Iasm: {
Jim Stichnothbbca7542015-02-11 16:08:31 -08005385 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
5386 OstreamLocker L(Ctx);
John Porto8b1a7052015-06-17 13:20:08 -07005387 for (const VariableDeclaration *Var : Vars) {
Jim Stichnothbbca7542015-02-11 16:08:31 -08005388 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto8b1a7052015-06-17 13:20:08 -07005389 emitGlobal(*Var, SectionSuffix);
Jim Stichnothbbca7542015-02-11 16:08:31 -08005390 }
5391 }
Jim Stichnothd442e7e2015-02-12 14:01:48 -08005392 } break;
Jim Stichnothbbca7542015-02-11 16:08:31 -08005393 }
Jan Voung72984d82015-01-29 14:42:38 -08005394}
5395
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005396template <typename T> struct PoolTypeConverter {};
5397
5398template <> struct PoolTypeConverter<float> {
5399 typedef uint32_t PrimitiveIntType;
5400 typedef ConstantFloat IceType;
5401 static const Type Ty = IceType_f32;
5402 static const char *TypeName;
5403 static const char *AsmTag;
5404 static const char *PrintfString;
5405};
5406const char *PoolTypeConverter<float>::TypeName = "float";
5407const char *PoolTypeConverter<float>::AsmTag = ".long";
5408const char *PoolTypeConverter<float>::PrintfString = "0x%x";
5409
5410template <> struct PoolTypeConverter<double> {
5411 typedef uint64_t PrimitiveIntType;
5412 typedef ConstantDouble IceType;
5413 static const Type Ty = IceType_f64;
5414 static const char *TypeName;
5415 static const char *AsmTag;
5416 static const char *PrintfString;
5417};
5418const char *PoolTypeConverter<double>::TypeName = "double";
5419const char *PoolTypeConverter<double>::AsmTag = ".quad";
5420const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
5421
Qining Lu253dc8a2015-06-22 10:10:23 -07005422// Add converter for int type constant pooling
5423template <> struct PoolTypeConverter<uint32_t> {
5424 typedef uint32_t PrimitiveIntType;
5425 typedef ConstantInteger32 IceType;
5426 static const Type Ty = IceType_i32;
5427 static const char *TypeName;
5428 static const char *AsmTag;
5429 static const char *PrintfString;
5430};
5431const char *PoolTypeConverter<uint32_t>::TypeName = "i32";
5432const char *PoolTypeConverter<uint32_t>::AsmTag = ".long";
5433const char *PoolTypeConverter<uint32_t>::PrintfString = "0x%x";
5434
5435// Add converter for int type constant pooling
5436template <> struct PoolTypeConverter<uint16_t> {
5437 typedef uint32_t PrimitiveIntType;
5438 typedef ConstantInteger32 IceType;
5439 static const Type Ty = IceType_i16;
5440 static const char *TypeName;
5441 static const char *AsmTag;
5442 static const char *PrintfString;
5443};
5444const char *PoolTypeConverter<uint16_t>::TypeName = "i16";
5445const char *PoolTypeConverter<uint16_t>::AsmTag = ".short";
5446const char *PoolTypeConverter<uint16_t>::PrintfString = "0x%x";
5447
5448// Add converter for int type constant pooling
5449template <> struct PoolTypeConverter<uint8_t> {
5450 typedef uint32_t PrimitiveIntType;
5451 typedef ConstantInteger32 IceType;
5452 static const Type Ty = IceType_i8;
5453 static const char *TypeName;
5454 static const char *AsmTag;
5455 static const char *PrintfString;
5456};
5457const char *PoolTypeConverter<uint8_t>::TypeName = "i8";
5458const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";
5459const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
5460
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005461template <typename T>
Jan Voung72984d82015-01-29 14:42:38 -08005462void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
Jan Voungb5447a02015-02-24 16:57:17 -08005463 if (!ALLOW_DUMP)
5464 return;
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005465 Ostream &Str = Ctx->getStrEmit();
5466 Type Ty = T::Ty;
5467 SizeT Align = typeAlignInBytes(Ty);
5468 ConstantList Pool = Ctx->getConstantPool(Ty);
5469
5470 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
5471 << "\n";
5472 Str << "\t.align\t" << Align << "\n";
5473 for (Constant *C : Pool) {
Qining Lu253dc8a2015-06-22 10:10:23 -07005474 if (!C->getShouldBePooled())
5475 continue;
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005476 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
5477 typename T::IceType::PrimType Value = Const->getValue();
5478 // Use memcpy() to copy bits from Value into RawValue in a way
5479 // that avoids breaking strict-aliasing rules.
5480 typename T::PrimitiveIntType RawValue;
5481 memcpy(&RawValue, &Value, sizeof(Value));
5482 char buf[30];
5483 int CharsPrinted =
5484 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
5485 assert(CharsPrinted >= 0 &&
5486 (size_t)CharsPrinted < llvm::array_lengthof(buf));
5487 (void)CharsPrinted; // avoid warnings if asserts are disabled
5488 Const->emitPoolLabel(Str);
5489 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
5490 << Value << "\n";
5491 }
5492}
5493
John Porto0f86d032015-06-15 07:44:27 -07005494void TargetDataX8632::lowerConstants() {
Karl Schimpfdf80eb82015-02-09 14:20:22 -08005495 if (Ctx->getFlags().getDisableTranslation())
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005496 return;
5497 // No need to emit constants from the int pool since (for x86) they
5498 // are embedded as immediates in the instructions, just emit float/double.
Jim Stichnothd442e7e2015-02-12 14:01:48 -08005499 switch (Ctx->getFlags().getOutFileType()) {
5500 case FT_Elf: {
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005501 ELFObjectWriter *Writer = Ctx->getObjectWriter();
Qining Lu253dc8a2015-06-22 10:10:23 -07005502
5503 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);
5504 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);
5505 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);
5506
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005507 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5508 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
Jim Stichnothd442e7e2015-02-12 14:01:48 -08005509 } break;
5510 case FT_Asm:
5511 case FT_Iasm: {
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005512 OstreamLocker L(Ctx);
Qining Lu253dc8a2015-06-22 10:10:23 -07005513
5514 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);
5515 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);
5516 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);
5517
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005518 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5519 emitConstantPool<PoolTypeConverter<double>>(Ctx);
Jim Stichnothd442e7e2015-02-12 14:01:48 -08005520 } break;
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005521 }
5522}
5523
Jan Voungfb792842015-06-11 15:27:50 -07005524TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
5525 : TargetHeaderLowering(Ctx) {}
5526
Qining Lu253dc8a2015-06-22 10:10:23 -07005527// Randomize or pool an Immediate.
5528Operand *TargetX8632::randomizeOrPoolImmediate(Constant *Immediate,
5529 int32_t RegNum) {
5530 assert(llvm::isa<ConstantInteger32>(Immediate) ||
5531 llvm::isa<ConstantRelocatable>(Immediate));
5532 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5533 RandomizationPoolingPaused == true) {
5534 // Immediates randomization/pooling off or paused
5535 return Immediate;
5536 }
5537 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
5538 Ctx->statsUpdateRPImms();
5539 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5540 RPI_Randomize) {
5541 // blind the constant
5542 // FROM:
5543 // imm
5544 // TO:
5545 // insert: mov imm+cookie, Reg
5546 // insert: lea -cookie[Reg], Reg
5547 // => Reg
5548 // If we have already assigned a phy register, we must come from
5549 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
5550 // the assigned register as this assignment is that start of its use-def
5551 // chain. So we add RegNum argument here.
5552 // Note we use 'lea' instruction instead of 'xor' to avoid affecting
5553 // the flags.
5554 Variable *Reg = makeReg(IceType_i32, RegNum);
5555 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
5556 uint32_t Value = Integer->getValue();
5557 uint32_t Cookie = Ctx->getRandomizationCookie();
5558 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
5559 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
5560 _lea(Reg,
5561 OperandX8632Mem::create(Func, IceType_i32, Reg, Offset, nullptr, 0));
5562 // make sure liveness analysis won't kill this variable, otherwise a
5563 // liveness
5564 // assertion will be triggered.
5565 _set_dest_nonkillable();
5566 if (Immediate->getType() != IceType_i32) {
5567 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
5568 _mov(TruncReg, Reg);
5569 return TruncReg;
5570 }
5571 return Reg;
5572 }
5573 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5574 // pool the constant
5575 // FROM:
5576 // imm
5577 // TO:
5578 // insert: mov $label, Reg
5579 // => Reg
5580 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
5581 Immediate->setShouldBePooled(true);
5582 // if we have already assigned a phy register, we must come from
5583 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
5584 // the assigned register as this assignment is that start of its use-def
5585 // chain. So we add RegNum argument here.
5586 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5587 IceString Label;
5588 llvm::raw_string_ostream Label_stream(Label);
5589 Immediate->emitPoolLabel(Label_stream);
5590 const RelocOffsetT Offset = 0;
5591 const bool SuppressMangling = true;
5592 Constant *Symbol =
5593 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
5594 OperandX8632Mem *MemOperand =
5595 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol);
5596 _mov(Reg, MemOperand);
5597 return Reg;
5598 }
5599 assert("Unsupported -randomize-pool-immediates option" && false);
5600 }
5601 // the constant Immediate is not eligible for blinding/pooling
5602 return Immediate;
5603}
5604
5605OperandX8632Mem *
5606TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,
5607 int32_t RegNum) {
5608 assert(MemOperand);
5609 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5610 RandomizationPoolingPaused == true) {
5611 // immediates randomization/pooling is turned off
5612 return MemOperand;
5613 }
5614
5615 // If this memory operand is already a randommized one, we do
5616 // not randomize it again.
5617 if (MemOperand->getRandomized())
5618 return MemOperand;
5619
5620 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
5621 if (C->shouldBeRandomizedOrPooled(Ctx)) {
5622 // The offset of this mem operand should be blinded or pooled
5623 Ctx->statsUpdateRPImms();
5624 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5625 RPI_Randomize) {
5626 // blind the constant offset
5627 // FROM:
5628 // offset[base, index, shift]
5629 // TO:
5630 // insert: lea offset+cookie[base], RegTemp
5631 // => -cookie[RegTemp, index, shift]
5632 uint32_t Value =
5633 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
5634 ->getValue();
5635 uint32_t Cookie = Ctx->getRandomizationCookie();
5636 Constant *Mask1 = Ctx->getConstantInt(
5637 MemOperand->getOffset()->getType(), Cookie + Value);
5638 Constant *Mask2 =
5639 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
5640
5641 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create(
5642 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
5643 // If we have already assigned a physical register, we must come from
5644 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
5645 // the assigned register as this assignment is that start of its use-def
5646 // chain. So we add RegNum argument here.
5647 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5648 _lea(RegTemp, TempMemOperand);
5649 // As source operand doesn't use the dstreg, we don't need to add
5650 // _set_dest_nonkillable().
5651 // But if we use the same Dest Reg, that is, with RegNum
5652 // assigned, we should add this _set_dest_nonkillable()
5653 if (RegNum != Variable::NoRegister)
5654 _set_dest_nonkillable();
5655
5656 OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(
5657 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),
5658 MemOperand->getShift(), MemOperand->getSegmentRegister());
5659
5660 // Label this memory operand as randomize, so we won't randomize it
5661 // again in case we call legalize() mutiple times on this memory
5662 // operand.
5663 NewMemOperand->setRandomized(true);
5664 return NewMemOperand;
5665 }
5666 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5667 // pool the constant offset
5668 // FROM:
5669 // offset[base, index, shift]
5670 // TO:
5671 // insert: mov $label, RegTemp
5672 // insert: lea [base, RegTemp], RegTemp
5673 // =>[RegTemp, index, shift]
5674 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5675 RPI_Pool);
5676 // Memory operand should never exist as source operands in phi
5677 // lowering assignments, so there is no need to reuse any registers
5678 // here. For phi lowering, we should not ask for new physical
5679 // registers in general.
5680 // However, if we do meet Memory Operand during phi lowering, we
5681 // should not blind or pool the immediates for now.
5682 if (RegNum != Variable::NoRegister)
5683 return MemOperand;
5684 Variable *RegTemp = makeReg(IceType_i32);
5685 IceString Label;
5686 llvm::raw_string_ostream Label_stream(Label);
5687 MemOperand->getOffset()->emitPoolLabel(Label_stream);
5688 MemOperand->getOffset()->setShouldBePooled(true);
5689 const RelocOffsetT SymOffset = 0;
5690 bool SuppressMangling = true;
5691 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
5692 SuppressMangling);
5693 OperandX8632Mem *SymbolOperand = OperandX8632Mem::create(
5694 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
5695 _mov(RegTemp, SymbolOperand);
5696 // If we have a base variable here, we should add the lea instruction
5697 // to add the value of the base variable to RegTemp. If there is no
5698 // base variable, we won't need this lea instruction.
5699 if (MemOperand->getBase()) {
5700 OperandX8632Mem *CalculateOperand = OperandX8632Mem::create(
5701 Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
5702 RegTemp, 0, MemOperand->getSegmentRegister());
5703 _lea(RegTemp, CalculateOperand);
5704 _set_dest_nonkillable();
5705 }
5706 OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(
5707 Func, MemOperand->getType(), RegTemp, nullptr,
5708 MemOperand->getIndex(), MemOperand->getShift(),
5709 MemOperand->getSegmentRegister());
5710 return NewMemOperand;
5711 }
5712 assert("Unsupported -randomize-pool-immediates option" && false);
5713 }
5714 }
5715 // the offset is not eligible for blinding or pooling, return the original
5716 // mem operand
5717 return MemOperand;
5718}
5719
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005720} // end of namespace Ice